diff --git a/bolt/test/X86/section-end-sym.s b/bolt/test/X86/section-end-sym.s index a9bca5604ec16..38517bf7e0719 100644 --- a/bolt/test/X86/section-end-sym.s +++ b/bolt/test/X86/section-end-sym.s @@ -1,7 +1,7 @@ ## Check that BOLT doesn't consider end-of-section symbols (e.g., _etext) as ## functions. -# REQUIRES: system-linux +# REQUIRES: system-linux, asserts # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t.o # RUN: ld.lld %t.o -o %t.exe -q diff --git a/clang-tools-extra/clang-tidy/bugprone/UseAfterMoveCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/UseAfterMoveCheck.cpp index b7eadb87b4fcd..c10c3652a153a 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UseAfterMoveCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/UseAfterMoveCheck.cpp @@ -58,11 +58,11 @@ class UseAfterMoveFinder { public: UseAfterMoveFinder(ASTContext *TheContext); - // Within the given function body, finds the first use of 'MovedVariable' that + // Within the given code block, finds the first use of 'MovedVariable' that // occurs after 'MovingCall' (the expression that performs the move). If a // use-after-move is found, writes information about it to 'TheUseAfterMove'. // Returns whether a use-after-move was found. - bool find(Stmt *FunctionBody, const Expr *MovingCall, + bool find(Stmt *CodeBlock, const Expr *MovingCall, const ValueDecl *MovedVariable, UseAfterMove *TheUseAfterMove); private: @@ -104,7 +104,7 @@ static StatementMatcher inDecltypeOrTemplateArg() { UseAfterMoveFinder::UseAfterMoveFinder(ASTContext *TheContext) : Context(TheContext) {} -bool UseAfterMoveFinder::find(Stmt *FunctionBody, const Expr *MovingCall, +bool UseAfterMoveFinder::find(Stmt *CodeBlock, const Expr *MovingCall, const ValueDecl *MovedVariable, UseAfterMove *TheUseAfterMove) { // Generate the CFG manually instead of through an AnalysisDeclContext because @@ -118,12 +118,11 @@ bool UseAfterMoveFinder::find(Stmt *FunctionBody, const Expr *MovingCall, Options.AddImplicitDtors = true; Options.AddTemporaryDtors = true; std::unique_ptr TheCFG = - CFG::buildCFG(nullptr, FunctionBody, Context, Options); + CFG::buildCFG(nullptr, CodeBlock, Context, Options); if (!TheCFG) return false; - Sequence = - std::make_unique(TheCFG.get(), FunctionBody, Context); + Sequence = std::make_unique(TheCFG.get(), CodeBlock, Context); BlockMap = std::make_unique(TheCFG.get(), Context); Visited.clear(); @@ -398,20 +397,28 @@ static void emitDiagnostic(const Expr *MovingCall, const DeclRefExpr *MoveArg, } void UseAfterMoveCheck::registerMatchers(MatchFinder *Finder) { + // try_emplace is a common maybe-moving function that returns a + // bool to tell callers whether it moved. Ignore std::move inside + // try_emplace to avoid false positives as we don't track uses of + // the bool. + auto TryEmplaceMatcher = + cxxMemberCallExpr(callee(cxxMethodDecl(hasName("try_emplace")))); auto CallMoveMatcher = - callExpr(callee(functionDecl(hasName("::std::move"))), argumentCountIs(1), + callExpr(argumentCountIs(1), callee(functionDecl(hasName("::std::move"))), hasArgument(0, declRefExpr().bind("arg")), + unless(inDecltypeOrTemplateArg()), + unless(hasParent(TryEmplaceMatcher)), expr().bind("call-move"), anyOf(hasAncestor(compoundStmt( hasParent(lambdaExpr().bind("containing-lambda")))), - hasAncestor(functionDecl().bind("containing-func"))), - unless(inDecltypeOrTemplateArg()), - // try_emplace is a common maybe-moving function that returns a - // bool to tell callers whether it moved. Ignore std::move inside - // try_emplace to avoid false positives as we don't track uses of - // the bool. - unless(hasParent(cxxMemberCallExpr( - callee(cxxMethodDecl(hasName("try_emplace"))))))) - .bind("call-move"); + hasAncestor(functionDecl(anyOf( + cxxConstructorDecl( + hasAnyConstructorInitializer(withInitializer( + expr(anyOf(equalsBoundNode("call-move"), + hasDescendant(expr( + equalsBoundNode("call-move"))))) + .bind("containing-ctor-init")))) + .bind("containing-ctor"), + functionDecl().bind("containing-func")))))); Finder->addMatcher( traverse( @@ -434,6 +441,10 @@ void UseAfterMoveCheck::registerMatchers(MatchFinder *Finder) { } void UseAfterMoveCheck::check(const MatchFinder::MatchResult &Result) { + const auto *ContainingCtor = + Result.Nodes.getNodeAs("containing-ctor"); + const auto *ContainingCtorInit = + Result.Nodes.getNodeAs("containing-ctor-init"); const auto *ContainingLambda = Result.Nodes.getNodeAs("containing-lambda"); const auto *ContainingFunc = @@ -445,23 +456,38 @@ void UseAfterMoveCheck::check(const MatchFinder::MatchResult &Result) { if (!MovingCall || !MovingCall->getExprLoc().isValid()) MovingCall = CallMove; - Stmt *FunctionBody = nullptr; - if (ContainingLambda) - FunctionBody = ContainingLambda->getBody(); - else if (ContainingFunc) - FunctionBody = ContainingFunc->getBody(); - else - return; - // Ignore the std::move if the variable that was passed to it isn't a local // variable. if (!Arg->getDecl()->getDeclContext()->isFunctionOrMethod()) return; - UseAfterMoveFinder Finder(Result.Context); - UseAfterMove Use; - if (Finder.find(FunctionBody, MovingCall, Arg->getDecl(), &Use)) - emitDiagnostic(MovingCall, Arg, Use, this, Result.Context); + // Collect all code blocks that could use the arg after move. + llvm::SmallVector CodeBlocks{}; + if (ContainingCtor) { + CodeBlocks.push_back(ContainingCtor->getBody()); + if (ContainingCtorInit) { + // Collect the constructor initializer expressions. + bool BeforeMove{true}; + for (CXXCtorInitializer *Init : ContainingCtor->inits()) { + if (BeforeMove && Init->getInit()->IgnoreImplicit() == + ContainingCtorInit->IgnoreImplicit()) + BeforeMove = false; + if (!BeforeMove) + CodeBlocks.push_back(Init->getInit()); + } + } + } else if (ContainingLambda) { + CodeBlocks.push_back(ContainingLambda->getBody()); + } else if (ContainingFunc) { + CodeBlocks.push_back(ContainingFunc->getBody()); + } + + for (Stmt *CodeBlock : CodeBlocks) { + UseAfterMoveFinder Finder(Result.Context); + UseAfterMove Use; + if (Finder.find(CodeBlock, MovingCall, Arg->getDecl(), &Use)) + emitDiagnostic(MovingCall, Arg, Use, this, Result.Context); + } } } // namespace clang::tidy::bugprone diff --git a/clang-tools-extra/clang-tidy/google/AvoidUnderscoreInGoogletestNameCheck.cpp b/clang-tools-extra/clang-tidy/google/AvoidUnderscoreInGoogletestNameCheck.cpp index c5bd6055072aa..d522d6760af1d 100644 --- a/clang-tools-extra/clang-tidy/google/AvoidUnderscoreInGoogletestNameCheck.cpp +++ b/clang-tools-extra/clang-tidy/google/AvoidUnderscoreInGoogletestNameCheck.cpp @@ -47,16 +47,19 @@ class AvoidUnderscoreInGoogletestNameCallback : public PPCallbacks { if (!isGoogletestTestMacro(MacroName) || !Args || Args->getNumMacroArguments() < 2) return; - const Token *TestCaseNameToken = Args->getUnexpArgument(0); + const Token *TestSuiteNameToken = Args->getUnexpArgument(0); const Token *TestNameToken = Args->getUnexpArgument(1); - if (!TestCaseNameToken || !TestNameToken) + if (!TestSuiteNameToken || !TestNameToken) return; - std::string TestCaseName = PP->getSpelling(*TestCaseNameToken); - if (TestCaseName.find('_') != std::string::npos) - Check->diag(TestCaseNameToken->getLocation(), - "avoid using \"_\" in test case name \"%0\" according to " + std::string TestSuiteNameMaybeDisabled = + PP->getSpelling(*TestSuiteNameToken); + StringRef TestSuiteName = TestSuiteNameMaybeDisabled; + TestSuiteName.consume_front(KDisabledTestPrefix); + if (TestSuiteName.contains('_')) + Check->diag(TestSuiteNameToken->getLocation(), + "avoid using \"_\" in test suite name \"%0\" according to " "Googletest FAQ") - << TestCaseName; + << TestSuiteName; std::string TestNameMaybeDisabled = PP->getSpelling(*TestNameToken); StringRef TestName = TestNameMaybeDisabled; diff --git a/clang-tools-extra/clangd/CollectMacros.cpp b/clang-tools-extra/clangd/CollectMacros.cpp index 687f86e0a77eb..c0ed8b68ea481 100644 --- a/clang-tools-extra/clangd/CollectMacros.cpp +++ b/clang-tools-extra/clangd/CollectMacros.cpp @@ -9,12 +9,13 @@ #include "CollectMacros.h" #include "AST.h" #include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/STLExtras.h" namespace clang { namespace clangd { void CollectMainFileMacros::add(const Token &MacroNameTok, const MacroInfo *MI, - bool IsDefinition) { + bool IsDefinition, bool InIfCondition) { if (!InMainFile) return; auto Loc = MacroNameTok.getLocation(); @@ -26,9 +27,49 @@ void CollectMainFileMacros::add(const Token &MacroNameTok, const MacroInfo *MI, auto Range = halfOpenToRange( SM, CharSourceRange::getCharRange(Loc, MacroNameTok.getEndLoc())); if (auto SID = getSymbolID(Name, MI, SM)) - Out.MacroRefs[SID].push_back({Range, IsDefinition}); + Out.MacroRefs[SID].push_back({Range, IsDefinition, InIfCondition}); else - Out.UnknownMacros.push_back({Range, IsDefinition}); + Out.UnknownMacros.push_back({Range, IsDefinition, InIfCondition}); +} + +void CollectMainFileMacros::FileChanged(SourceLocation Loc, FileChangeReason, + SrcMgr::CharacteristicKind, FileID) { + InMainFile = isInsideMainFile(Loc, SM); +} +void CollectMainFileMacros::MacroExpands(const Token &MacroName, + const MacroDefinition &MD, + SourceRange Range, + const MacroArgs *Args) { + add(MacroName, MD.getMacroInfo()); +} +void CollectMainFileMacros::MacroUndefined(const clang::Token &MacroName, + const clang::MacroDefinition &MD, + const clang::MacroDirective *Undef) { + add(MacroName, MD.getMacroInfo()); +} +void CollectMainFileMacros::Ifdef(SourceLocation Loc, const Token &MacroName, + const MacroDefinition &MD) { + add(MacroName, MD.getMacroInfo(), /*IsDefinition=*/false, + /*InConditionalDirective=*/true); +} +void CollectMainFileMacros::Ifndef(SourceLocation Loc, const Token &MacroName, + const MacroDefinition &MD) { + add(MacroName, MD.getMacroInfo(), /*IsDefinition=*/false, + /*InConditionalDirective=*/true); +} +void CollectMainFileMacros::Defined(const Token &MacroName, + const MacroDefinition &MD, + SourceRange Range) { + add(MacroName, MD.getMacroInfo(), /*IsDefinition=*/false, + /*InConditionalDirective=*/true); +} +void CollectMainFileMacros::SourceRangeSkipped(SourceRange R, + SourceLocation EndifLoc) { + if (!InMainFile) + return; + Position Begin = sourceLocToPosition(SM, R.getBegin()); + Position End = sourceLocToPosition(SM, R.getEnd()); + Out.SkippedRanges.push_back(Range{Begin, End}); } class CollectPragmaMarks : public PPCallbacks { @@ -58,5 +99,24 @@ collectPragmaMarksCallback(const SourceManager &SM, return std::make_unique(SM, Out); } +void CollectMainFileMacros::MacroDefined(const Token &MacroName, + const MacroDirective *MD) { + + if (!InMainFile) + return; + const auto *MI = MD->getMacroInfo(); + add(MacroName, MD->getMacroInfo(), true); + if (MI) + for (const auto &Tok : MI->tokens()) { + auto *II = Tok.getIdentifierInfo(); + // Could this token be a reference to a macro? (Not param to this macro). + if (!II || !II->hadMacroDefinition() || + llvm::is_contained(MI->params(), II)) + continue; + if (const MacroInfo *MI = PP.getMacroInfo(II)) + add(Tok, MI); + } +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/CollectMacros.h b/clang-tools-extra/clangd/CollectMacros.h index 9d7b478f1c3c7..d5789a2a88912 100644 --- a/clang-tools-extra/clangd/CollectMacros.h +++ b/clang-tools-extra/clangd/CollectMacros.h @@ -13,6 +13,7 @@ #include "SourceCode.h" #include "index/SymbolID.h" #include "clang/Lex/PPCallbacks.h" +#include "clang/Lex/Preprocessor.h" #include "llvm/ADT/DenseMap.h" #include @@ -24,6 +25,8 @@ struct MacroOccurrence { // SourceManager from preamble is not available when we build the AST. Range Rng; bool IsDefinition; + // True if the occurence is used in a conditional directive, e.g. #ifdef MACRO + bool InConditionalDirective; }; struct MainFileMacros { @@ -43,56 +46,37 @@ struct MainFileMacros { /// - collect macros after the preamble of the main file (in ParsedAST.cpp) class CollectMainFileMacros : public PPCallbacks { public: - explicit CollectMainFileMacros(const SourceManager &SM, MainFileMacros &Out) - : SM(SM), Out(Out) {} + explicit CollectMainFileMacros(const Preprocessor &PP, MainFileMacros &Out) + : SM(PP.getSourceManager()), PP(PP), Out(Out) {} void FileChanged(SourceLocation Loc, FileChangeReason, - SrcMgr::CharacteristicKind, FileID) override { - InMainFile = isInsideMainFile(Loc, SM); - } + SrcMgr::CharacteristicKind, FileID) override; - void MacroDefined(const Token &MacroName, const MacroDirective *MD) override { - add(MacroName, MD->getMacroInfo(), /*IsDefinition=*/true); - } + void MacroDefined(const Token &MacroName, const MacroDirective *MD) override; void MacroExpands(const Token &MacroName, const MacroDefinition &MD, - SourceRange Range, const MacroArgs *Args) override { - add(MacroName, MD.getMacroInfo()); - } + SourceRange Range, const MacroArgs *Args) override; void MacroUndefined(const clang::Token &MacroName, const clang::MacroDefinition &MD, - const clang::MacroDirective *Undef) override { - add(MacroName, MD.getMacroInfo()); - } + const clang::MacroDirective *Undef) override; + // FIXME: handle C++23 #elifdef, #elifndef void Ifdef(SourceLocation Loc, const Token &MacroName, - const MacroDefinition &MD) override { - add(MacroName, MD.getMacroInfo()); - } - + const MacroDefinition &MD) override; void Ifndef(SourceLocation Loc, const Token &MacroName, - const MacroDefinition &MD) override { - add(MacroName, MD.getMacroInfo()); - } + const MacroDefinition &MD) override; void Defined(const Token &MacroName, const MacroDefinition &MD, - SourceRange Range) override { - add(MacroName, MD.getMacroInfo()); - } - - void SourceRangeSkipped(SourceRange R, SourceLocation EndifLoc) override { - if (!InMainFile) - return; - Position Begin = sourceLocToPosition(SM, R.getBegin()); - Position End = sourceLocToPosition(SM, R.getEnd()); - Out.SkippedRanges.push_back(Range{Begin, End}); - } + SourceRange Range) override; + + void SourceRangeSkipped(SourceRange R, SourceLocation EndifLoc) override; private: void add(const Token &MacroNameTok, const MacroInfo *MI, - bool IsDefinition = false); + bool IsDefinition = false, bool InConditionalDirective = false); const SourceManager &SM; + const Preprocessor &PP; bool InMainFile = true; MainFileMacros &Out; }; diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp index c5436141adbf7..e240c22259f35 100644 --- a/clang-tools-extra/clangd/Hover.cpp +++ b/clang-tools-extra/clangd/Hover.cpp @@ -12,11 +12,16 @@ #include "CodeCompletionStrings.h" #include "Config.h" #include "FindTarget.h" +#include "IncludeCleaner.h" #include "ParsedAST.h" #include "Selection.h" #include "SourceCode.h" +#include "clang-include-cleaner/Analysis.h" +#include "clang-include-cleaner/Types.h" #include "index/SymbolCollector.h" +#include "support/Logger.h" #include "support/Markup.h" +#include "support/Trace.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTDiagnostic.h" #include "clang/AST/ASTTypeTraits.h" @@ -43,11 +48,13 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/Error.h" #include "llvm/Support/Format.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/raw_ostream.h" #include #include +#include namespace clang { namespace clangd { @@ -1084,6 +1091,49 @@ const NamedDecl *pickDeclToUse(llvm::ArrayRef Candidates) { return Candidates.front(); } +void maybeAddSymbolProviders(ParsedAST &AST, HoverInfo &HI, + include_cleaner::Symbol Sym) { + trace::Span Tracer("Hover::maybeAddSymbolProviders"); + + const SourceManager &SM = AST.getSourceManager(); + llvm::SmallVector RankedProviders = + include_cleaner::headersForSymbol(Sym, SM, AST.getPragmaIncludes()); + if (RankedProviders.empty()) + return; + + std::string Result; + include_cleaner::Includes ConvertedIncludes = + convertIncludes(SM, AST.getIncludeStructure().MainFileIncludes); + for (const auto &P : RankedProviders) { + if (P.kind() == include_cleaner::Header::Physical && + P.physical() == SM.getFileEntryForID(SM.getMainFileID())) + // Main file ranked higher than any #include'd file + break; + + // Pick the best-ranked #include'd provider + auto Matches = ConvertedIncludes.match(P); + if (!Matches.empty()) { + Result = Matches[0]->quote(); + break; + } + } + + if (!Result.empty()) { + HI.Provider = std::move(Result); + return; + } + + // Pick the best-ranked non-#include'd provider + const auto &H = RankedProviders.front(); + if (H.kind() == include_cleaner::Header::Physical && + H.physical() == SM.getFileEntryForID(SM.getMainFileID())) + // Do not show main file as provider, otherwise we'll show provider info + // on local variables, etc. + return; + + HI.Provider = spellHeader(AST, SM.getFileEntryForID(SM.getMainFileID()), H); +} + } // namespace std::optional getHover(ParsedAST &AST, Position Pos, @@ -1131,6 +1181,12 @@ std::optional getHover(ParsedAST &AST, Position Pos, HighlightRange = Tok.range(SM).toCharRange(SM); if (auto M = locateMacroAt(Tok, AST.getPreprocessor())) { HI = getHoverContents(*M, Tok, AST); + if (auto DefLoc = M->Info->getDefinitionLoc(); DefLoc.isValid()) { + include_cleaner::Macro IncludeCleanerMacro{ + AST.getPreprocessor().getIdentifierInfo(Tok.text(SM)), DefLoc}; + maybeAddSymbolProviders(AST, *HI, + include_cleaner::Symbol{IncludeCleanerMacro}); + } break; } } else if (Tok.kind() == tok::kw_auto || Tok.kind() == tok::kw_decltype) { @@ -1168,6 +1224,7 @@ std::optional getHover(ParsedAST &AST, Position Pos, if (!HI->Value) HI->Value = printExprValue(N, AST.getASTContext()); maybeAddCalleeArgInfo(N, *HI, PP); + maybeAddSymbolProviders(AST, *HI, include_cleaner::Symbol{*DeclToUse}); } else if (const Expr *E = N->ASTNode.get()) { HI = getHoverContents(N, E, AST, PP, Index); } else if (const Attr *A = N->ASTNode.get()) { @@ -1217,6 +1274,14 @@ markup::Document HoverInfo::present() const { assert(!Name.empty() && "hover triggered on a nameless symbol"); Header.appendCode(Name); + if (!Provider.empty()) { + markup::Paragraph &DI = Output.addParagraph(); + DI.appendText("provided by"); + DI.appendSpace(); + DI.appendCode(Provider); + Output.addRuler(); + } + // Put a linebreak after header to increase readability. Output.addRuler(); // Print Types on their own lines to reduce chances of getting line-wrapped by diff --git a/clang-tools-extra/clangd/Hover.h b/clang-tools-extra/clangd/Hover.h index e63ff95b400b3..7ade177f89cc1 100644 --- a/clang-tools-extra/clangd/Hover.h +++ b/clang-tools-extra/clangd/Hover.h @@ -14,6 +14,7 @@ #include "support/Markup.h" #include "clang/Index/IndexSymbol.h" #include +#include namespace clang { namespace clangd { @@ -67,6 +68,8 @@ struct HoverInfo { std::string LocalScope; /// Name of the symbol, does not contain any "::". std::string Name; + /// Header providing the symbol (best match). Contains ""<>. + std::string Provider; std::optional SymRange; index::SymbolKind Kind = index::SymbolKind::Unknown; std::string Documentation; diff --git a/clang-tools-extra/clangd/IncludeCleaner.cpp b/clang-tools-extra/clangd/IncludeCleaner.cpp index 98135529f259b..ab7c05eb834c0 100644 --- a/clang-tools-extra/clangd/IncludeCleaner.cpp +++ b/clang-tools-extra/clangd/IncludeCleaner.cpp @@ -93,8 +93,6 @@ bool isFilteredByConfig(const Config &Cfg, llvm::StringRef HeaderPath) { static bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST, const Config &Cfg, const include_cleaner::PragmaIncludes *PI) { - if (PI && PI->shouldKeep(Inc.HashLine + 1)) - return false; // FIXME(kirillbobyrev): We currently do not support the umbrella headers. // System headers are likely to be standard library headers. // Until we have good support for umbrella headers, don't warn about them. @@ -108,6 +106,20 @@ static bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST, auto FE = AST.getSourceManager().getFileManager().getFileRef( AST.getIncludeStructure().getRealPath(HID)); assert(FE); + if (PI) { + if (PI->shouldKeep(Inc.HashLine + 1)) + return false; + // Check if main file is the public interface for a private header. If so we + // shouldn't diagnose it as unused. + if(auto PHeader = PI->getPublic(*FE); !PHeader.empty()) { + PHeader = PHeader.trim("<>\""); + // Since most private -> public mappings happen in a verbatim way, we + // check textually here. This might go wrong in presence of symlinks or + // header mappings. But that's not different than rest of the places. + if(AST.tuPath().endswith(PHeader)) + return false; + } + } // Headers without include guards have side effects and are not // self-contained, skip them. if (!AST.getPreprocessor().getHeaderSearchInfo().isFileMultipleIncludeGuarded( @@ -124,45 +136,6 @@ static bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST, return true; } -include_cleaner::Includes -convertIncludes(const SourceManager &SM, - const llvm::ArrayRef MainFileIncludes) { - include_cleaner::Includes Includes; - for (const Inclusion &Inc : MainFileIncludes) { - include_cleaner::Include TransformedInc; - llvm::StringRef WrittenRef = llvm::StringRef(Inc.Written); - TransformedInc.Spelled = WrittenRef.trim("\"<>"); - TransformedInc.HashLocation = - SM.getComposedLoc(SM.getMainFileID(), Inc.HashOffset); - TransformedInc.Line = Inc.HashLine + 1; - TransformedInc.Angled = WrittenRef.starts_with("<"); - auto FE = SM.getFileManager().getFile(Inc.Resolved); - if (!FE) { - elog("IncludeCleaner: Failed to get an entry for resolved path {0}: {1}", - Inc.Resolved, FE.getError().message()); - continue; - } - TransformedInc.Resolved = *FE; - Includes.add(std::move(TransformedInc)); - } - return Includes; -} - -std::string spellHeader(ParsedAST &AST, const FileEntry *MainFile, - include_cleaner::Header Provider) { - if (Provider.kind() == include_cleaner::Header::Physical) { - if (auto CanonicalPath = - getCanonicalPath(Provider.physical(), AST.getSourceManager())) { - std::string SpelledHeader = - llvm::cantFail(URI::includeSpelling(URI::create(*CanonicalPath))); - if (!SpelledHeader.empty()) - return SpelledHeader; - } - } - return include_cleaner::spellHeader( - Provider, AST.getPreprocessor().getHeaderSearchInfo(), MainFile); -} - std::vector collectMacroReferences(ParsedAST &AST) { const auto &SM = AST.getSourceManager(); @@ -315,6 +288,44 @@ std::vector generateUnusedIncludeDiagnostics( } } // namespace +include_cleaner::Includes +convertIncludes(const SourceManager &SM, + const llvm::ArrayRef Includes) { + include_cleaner::Includes ConvertedIncludes; + for (const Inclusion &Inc : Includes) { + include_cleaner::Include TransformedInc; + llvm::StringRef WrittenRef = llvm::StringRef(Inc.Written); + TransformedInc.Spelled = WrittenRef.trim("\"<>"); + TransformedInc.HashLocation = + SM.getComposedLoc(SM.getMainFileID(), Inc.HashOffset); + TransformedInc.Line = Inc.HashLine + 1; + TransformedInc.Angled = WrittenRef.starts_with("<"); + auto FE = SM.getFileManager().getFile(Inc.Resolved); + if (!FE) { + elog("IncludeCleaner: Failed to get an entry for resolved path {0}: {1}", + Inc.Resolved, FE.getError().message()); + continue; + } + TransformedInc.Resolved = *FE; + ConvertedIncludes.add(std::move(TransformedInc)); + } + return ConvertedIncludes; +} + +std::string spellHeader(ParsedAST &AST, const FileEntry *MainFile, + include_cleaner::Header Provider) { + if (Provider.kind() == include_cleaner::Header::Physical) { + if (auto CanonicalPath = + getCanonicalPath(Provider.physical(), AST.getSourceManager())) { + std::string SpelledHeader = + llvm::cantFail(URI::includeSpelling(URI::create(*CanonicalPath))); + if (!SpelledHeader.empty()) + return SpelledHeader; + } + } + return include_cleaner::spellHeader( + Provider, AST.getPreprocessor().getHeaderSearchInfo(), MainFile); +} std::vector getUnused(ParsedAST &AST, diff --git a/clang-tools-extra/clangd/IncludeCleaner.h b/clang-tools-extra/clangd/IncludeCleaner.h index d7edca035c965..1a5f07869d569 100644 --- a/clang-tools-extra/clangd/IncludeCleaner.h +++ b/clang-tools-extra/clangd/IncludeCleaner.h @@ -68,6 +68,16 @@ std::vector issueIncludeCleanerDiagnostics(ParsedAST &AST, /// FIXME: remove this hack once the implementation is good enough. void setIncludeCleanerAnalyzesStdlib(bool B); +/// Converts the clangd include representation to include-cleaner +/// include representation. +include_cleaner::Includes +convertIncludes(const SourceManager &SM, + const llvm::ArrayRef Includes); + +/// Determines the header spelling of an include-cleaner header +/// representation. The spelling contains the ""<> characters. +std::string spellHeader(ParsedAST &AST, const FileEntry *MainFile, + include_cleaner::Header Provider); } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp index 1671eec133b6e..1501a5c5f3c3b 100644 --- a/clang-tools-extra/clangd/ParsedAST.cpp +++ b/clang-tools-extra/clangd/ParsedAST.cpp @@ -610,11 +610,12 @@ ParsedAST::build(llvm::StringRef Filename, const ParseInputs &Inputs, Macros = Patch->mainFileMacros(); Marks = Patch->marks(); } - Clang->getPreprocessor().addPPCallbacks( - std::make_unique(Clang->getSourceManager(), - Macros)); + auto& PP = Clang->getPreprocessor(); + PP.addPPCallbacks( + std::make_unique( + PP, Macros)); - Clang->getPreprocessor().addPPCallbacks( + PP.addPPCallbacks( collectPragmaMarksCallback(Clang->getSourceManager(), Marks)); // Copy over the includes from the preamble, then combine with the @@ -626,10 +627,10 @@ ParsedAST::build(llvm::StringRef Filename, const ParseInputs &Inputs, CanonIncludes.addSystemHeadersMapping(Clang->getLangOpts()); std::unique_ptr IWYUHandler = collectIWYUHeaderMaps(&CanonIncludes); - Clang->getPreprocessor().addCommentHandler(IWYUHandler.get()); + PP.addCommentHandler(IWYUHandler.get()); // Collect tokens of the main file. - syntax::TokenCollector CollectTokens(Clang->getPreprocessor()); + syntax::TokenCollector CollectTokens(PP); // To remain consistent with preamble builds, these callbacks must be called // exactly here, after preprocessor is initialized and BeginSourceFile() was @@ -660,7 +661,7 @@ ParsedAST::build(llvm::StringRef Filename, const ParseInputs &Inputs, // XXX: This is messy: clang-tidy checks flush some diagnostics at EOF. // However Action->EndSourceFile() would destroy the ASTContext! // So just inform the preprocessor of EOF, while keeping everything alive. - Clang->getPreprocessor().EndSourceFile(); + PP.EndSourceFile(); // UnitDiagsConsumer is local, we can not store it in CompilerInstance that // has a longer lifetime. Clang->getDiagnostics().setClient(new IgnoreDiagnostics); diff --git a/clang-tools-extra/clangd/Preamble.cpp b/clang-tools-extra/clangd/Preamble.cpp index 3b0af0ab50a62..08662697a4a5c 100644 --- a/clang-tools-extra/clangd/Preamble.cpp +++ b/clang-tools-extra/clangd/Preamble.cpp @@ -133,22 +133,19 @@ class CppFilePreambleCallbacks : public PreambleCallbacks { CanonIncludes.addSystemHeadersMapping(CI.getLangOpts()); LangOpts = &CI.getLangOpts(); SourceMgr = &CI.getSourceManager(); + PP = &CI.getPreprocessor(); Includes.collect(CI); - if (Config::current().Diagnostics.UnusedIncludes == - Config::IncludesPolicy::Strict || - Config::current().Diagnostics.MissingIncludes == - Config::IncludesPolicy::Strict) - Pragmas.record(CI); + Pragmas.record(CI); if (BeforeExecuteCallback) BeforeExecuteCallback(CI); } std::unique_ptr createPPCallbacks() override { - assert(SourceMgr && LangOpts && - "SourceMgr and LangOpts must be set at this point"); + assert(SourceMgr && LangOpts && PP && + "SourceMgr, LangOpts and PP must be set at this point"); return std::make_unique( - std::make_unique(*SourceMgr, Macros), + std::make_unique(*PP, Macros), collectPragmaMarksCallback(*SourceMgr, Marks)); } @@ -215,6 +212,7 @@ class CppFilePreambleCallbacks : public PreambleCallbacks { std::unique_ptr IWYUHandler = nullptr; const clang::LangOptions *LangOpts = nullptr; const SourceManager *SourceMgr = nullptr; + const Preprocessor *PP = nullptr; PreambleBuildStats *Stats; bool ParseForwardingFunctions; std::function BeforeExecuteCallback; @@ -382,7 +380,7 @@ scanPreamble(llvm::StringRef Contents, const tooling::CompileCommand &Cmd) { PP.addPPCallbacks( std::make_unique(PP, SP.TextualDirectives)); PP.addPPCallbacks(collectPragmaMarksCallback(SM, SP.Marks)); - PP.addPPCallbacks(std::make_unique(SM, SP.Macros)); + PP.addPPCallbacks(std::make_unique(PP, SP.Macros)); if (llvm::Error Err = Action.Execute()) return std::move(Err); Action.EndSourceFile(); diff --git a/clang-tools-extra/clangd/unittests/CollectMacrosTests.cpp b/clang-tools-extra/clangd/unittests/CollectMacrosTests.cpp index 196ed5cea4693..163a7f1a31707 100644 --- a/clang-tools-extra/clangd/unittests/CollectMacrosTests.cpp +++ b/clang-tools-extra/clangd/unittests/CollectMacrosTests.cpp @@ -8,12 +8,14 @@ #include "AST.h" #include "Annotations.h" #include "CollectMacros.h" +#include "Matchers.h" #include "SourceCode.h" #include "TestTU.h" #include "clang/Basic/SourceLocation.h" #include "llvm/Support/ScopedPrinter.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +#include namespace clang { namespace clangd { @@ -21,19 +23,24 @@ namespace { using testing::UnorderedElementsAreArray; +MATCHER_P(rangeIs, R, "") { return arg.Rng == R; } +MATCHER(isDef, "") { return arg.IsDefinition; } +MATCHER(inConditionalDirective, "") { return arg.InConditionalDirective; } + TEST(CollectMainFileMacros, SelectedMacros) { // References of the same symbol must have the ranges with the same // name(integer). If there are N different symbols then they must be named // from 1 to N. Macros for which SymbolID cannot be computed must be named - // "Unknown". + // "Unknown". The payload of the annotation describes the extra bit + // information of the MacroOccurrence (e.g. $1(def) => IsDefinition). const char *Tests[] = { R"cpp(// Macros: Cursor on definition. - #define $1[[FOO]](x,y) (x + y) + #define $1(def)[[FOO]](x,y) (x + y) int main() { int x = $1[[FOO]]($1[[FOO]](3, 4), $1[[FOO]](5, 6)); } )cpp", R"cpp( - #define $1[[M]](X) X; - #define $2[[abc]] 123 + #define $1(def)[[M]](X) X; + #define $2(def)[[abc]] 123 int s = $1[[M]]($2[[abc]]); )cpp", // FIXME: Locating macro in duplicate definitions doesn't work. Enable @@ -48,31 +55,50 @@ TEST(CollectMainFileMacros, SelectedMacros) { // #undef $2[[abc]] // )cpp", R"cpp( - #ifdef $Unknown[[UNDEFINED]] + #ifdef $Unknown(condit)[[UNDEFINED]] + #endif + + #ifndef $Unknown(condit)[[UNDEFINED]] + #endif + + #if defined($Unknown(condit)[[UNDEFINED]]) #endif )cpp", R"cpp( - #ifndef $Unknown[[abc]] - #define $1[[abc]] - #ifdef $1[[abc]] + #ifndef $Unknown(condit)[[abc]] + #define $1(def)[[abc]] + #ifdef $1(condit)[[abc]] #endif #endif )cpp", R"cpp( // Macros from token concatenations not included. - #define $1[[CONCAT]](X) X##A() - #define $2[[PREPEND]](X) MACRO##X() - #define $3[[MACROA]]() 123 + #define $1(def)[[CONCAT]](X) X##A() + #define $2(def)[[PREPEND]](X) MACRO##X() + #define $3(def)[[MACROA]]() 123 int B = $1[[CONCAT]](MACRO); int D = $2[[PREPEND]](A); )cpp", R"cpp( - // FIXME: Macro names in a definition are not detected. - #define $1[[MACRO_ARGS2]](X, Y) X Y - #define $2[[FOO]] BAR - #define $3[[BAR]] 1 + #define $1(def)[[MACRO_ARGS2]](X, Y) X Y + #define $3(def)[[BAR]] 1 + #define $2(def)[[FOO]] $3[[BAR]] int A = $2[[FOO]]; )cpp"}; + auto ExpectedResults = [](const Annotations &T, StringRef Name) { + std::vector> ExpectedLocations; + for (const auto &[R, Bits] : T.rangesWithPayload(Name)) { + if (Bits == "def") + ExpectedLocations.push_back(testing::AllOf(rangeIs(R), isDef())); + else if (Bits == "condit") + ExpectedLocations.push_back( + testing::AllOf(rangeIs(R), inConditionalDirective())); + else + ExpectedLocations.push_back(testing::AllOf(rangeIs(R))); + } + return ExpectedLocations; + }; + for (const char *Test : Tests) { Annotations T(Test); auto AST = TestTU::withCode(T.code()).build(); @@ -80,13 +106,16 @@ TEST(CollectMainFileMacros, SelectedMacros) { auto &SM = AST.getSourceManager(); auto &PP = AST.getPreprocessor(); - // Known macros. - for (int I = 1;; I++) { - const auto ExpectedRefs = T.ranges(llvm::to_string(I)); - if (ExpectedRefs.empty()) - break; + for (const auto &[Name, Ranges] : T.all_ranges()) { + if (Name == "Unknown") { + EXPECT_THAT(ActualMacroRefs.UnknownMacros, + UnorderedElementsAreArray(ExpectedResults(T, "Unknown"))) + << "Unknown macros doesn't match in " << Test; + continue; + } - auto Loc = sourceLocationInMainFile(SM, ExpectedRefs.begin()->start); + auto Loc = sourceLocationInMainFile( + SM, offsetToPosition(T.code(), Ranges.front().Begin)); ASSERT_TRUE(bool(Loc)); const auto *Id = syntax::spelledIdentifierTouching(*Loc, AST.getTokens()); ASSERT_TRUE(Id); @@ -94,19 +123,11 @@ TEST(CollectMainFileMacros, SelectedMacros) { assert(Macro); auto SID = getSymbolID(Macro->Name, Macro->Info, SM); - std::vector Ranges; - for (const auto &Ref : ActualMacroRefs.MacroRefs[SID]) - Ranges.push_back(Ref.Rng); - EXPECT_THAT(ExpectedRefs, UnorderedElementsAreArray(Ranges)) - << "Annotation=" << I << ", MacroName=" << Macro->Name + EXPECT_THAT(ActualMacroRefs.MacroRefs[SID], + UnorderedElementsAreArray(ExpectedResults(T, Name))) + << "Annotation=" << Name << ", MacroName=" << Macro->Name << ", Test = " << Test; } - // Unknown macros. - std::vector Ranges; - for (const auto &Ref : AST.getMacros().UnknownMacros) - Ranges.push_back(Ref.Rng); - EXPECT_THAT(Ranges, UnorderedElementsAreArray(T.ranges("Unknown"))) - << "Unknown macros doesn't match in " << Test; } } } // namespace diff --git a/clang-tools-extra/clangd/unittests/HoverTests.cpp b/clang-tools-extra/clangd/unittests/HoverTests.cpp index 211fd1311c98f..728f5444014dc 100644 --- a/clang-tools-extra/clangd/unittests/HoverTests.cpp +++ b/clang-tools-extra/clangd/unittests/HoverTests.cpp @@ -14,11 +14,12 @@ #include "TestTU.h" #include "index/MemIndex.h" #include "clang/AST/Attr.h" +#include "clang/Format/Format.h" #include "clang/Index/IndexSymbol.h" #include "llvm/ADT/StringRef.h" -#include "gmock/gmock.h" #include "gtest/gtest.h" +#include #include #include @@ -28,6 +29,10 @@ namespace { using PassMode = HoverInfo::PassType::PassMode; +std::string guard(llvm::StringRef Code) { + return "#pragma once\n" + Code.str(); +} + TEST(Hover, Structured) { struct { const char *const Code; @@ -2882,6 +2887,99 @@ TEST(Hover, All) { } } +TEST(Hover, Providers) { + struct { + const char *Code; + const std::function ExpectedBuilder; + } Cases[] = {{R"cpp( + struct Foo {}; + Foo F = Fo^o{}; + )cpp", + [](HoverInfo &HI) { HI.Provider = ""; }}, + {R"cpp( + #include "foo.h" + Foo F = Fo^o{}; + )cpp", + [](HoverInfo &HI) { HI.Provider = "\"foo.h\""; }}, + {R"cpp( + #include "all.h" + Foo F = Fo^o{}; + )cpp", + [](HoverInfo &HI) { HI.Provider = "\"foo.h\""; }}, + {R"cpp( + #define FOO 5 + int F = ^FOO; + )cpp", + [](HoverInfo &HI) { HI.Provider = ""; }}, + {R"cpp( + #include "foo.h" + int F = ^FOO; + )cpp", + [](HoverInfo &HI) { HI.Provider = "\"foo.h\""; }}, + {R"cpp( + #include "all.h" + int F = ^FOO; + )cpp", + [](HoverInfo &HI) { HI.Provider = "\"foo.h\""; }}, + {R"cpp( + #include "foo.h" + Foo A; + Foo B; + Foo C = A ^+ B; + )cpp", + [](HoverInfo &HI) { HI.Provider = "\"foo.h\""; }}, + // Hover selects the underlying decl of the using decl + {R"cpp( + #include "foo.h" + namespace ns { + using ::Foo; + } + ns::F^oo d; + )cpp", + [](HoverInfo &HI) { HI.Provider = "\"foo.h\""; }}}; + + for (const auto &Case : Cases) { + Annotations Code{Case.Code}; + SCOPED_TRACE(Code.code()); + + TestTU TU; + TU.Filename = "foo.cpp"; + TU.Code = Code.code(); + TU.AdditionalFiles["foo.h"] = guard(R"cpp( + #define FOO 1 + class Foo {}; + Foo& operator+(const Foo, const Foo); + )cpp"); + TU.AdditionalFiles["all.h"] = guard("#include \"foo.h\""); + + auto AST = TU.build(); + auto H = getHover(AST, Code.point(), format::getLLVMStyle(), nullptr); + ASSERT_TRUE(H); + HoverInfo Expected; + Case.ExpectedBuilder(Expected); + SCOPED_TRACE(H->present().asMarkdown()); + EXPECT_EQ(H->Provider, Expected.Provider); + } +} + +TEST(Hover, ParseProviderInfo) { + HoverInfo HIFoo; + HIFoo.Name = "foo"; + HIFoo.Provider = "\"foo.h\""; + + HoverInfo HIFooBar; + HIFooBar.Name = "foo"; + HIFooBar.Provider = ""; + struct Case { + HoverInfo HI; + llvm::StringRef ExpectedMarkdown; + } Cases[] = {{HIFoo, "### `foo` \nprovided by `\"foo.h\"`"}, + {HIFooBar, "### `foo` \nprovided by ``"}}; + + for (const auto &Case : Cases) + EXPECT_EQ(Case.HI.present().asMarkdown(), Case.ExpectedMarkdown); +} + TEST(Hover, DocsFromIndex) { Annotations T(R"cpp( template class X {}; @@ -3359,8 +3457,8 @@ TEST(Hover, ParseDocumentation) { } } -// This is a separate test as headings don't create any differences in plaintext -// mode. +// This is a separate test as headings don't create any differences in +// plaintext mode. TEST(Hover, PresentHeadings) { HoverInfo HI; HI.Kind = index::SymbolKind::Variable; diff --git a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp index 409e3cee791c3..69b4e07439c38 100644 --- a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp +++ b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp @@ -30,6 +30,7 @@ #include "gtest/gtest.h" #include #include +#include #include namespace clang { @@ -328,6 +329,26 @@ TEST(IncludeCleaner, NoDiagsForObjC) { ParsedAST AST = TU.build(); EXPECT_THAT(AST.getDiagnostics(), llvm::ValueIs(IsEmpty())); } + +TEST(IncludeCleaner, UmbrellaUsesPrivate) { + TestTU TU; + TU.Code = R"cpp( + #include "private.h" + )cpp"; + TU.AdditionalFiles["private.h"] = guard(R"cpp( + // IWYU pragma: private, include "public.h" + void foo() {} + )cpp"); + TU.Filename = "public.h"; + Config Cfg; + Cfg.Diagnostics.UnusedIncludes = Config::IncludesPolicy::Strict; + WithContextValue Ctx(Config::Key, std::move(Cfg)); + ParsedAST AST = TU.build(); + EXPECT_THAT(AST.getDiagnostics(), llvm::ValueIs(IsEmpty())); + IncludeCleanerFindings Findings = computeIncludeCleanerFindings(AST); + EXPECT_THAT(Findings.UnusedIncludes, IsEmpty()); +} + } // namespace } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp index 259efcf54a6b2..975378118b7ad 100644 --- a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp +++ b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp @@ -399,7 +399,7 @@ TEST(SemanticHighlighting, GetsCorrectTokens) { #define $Macro_decl[[MACRO_CONCAT]](X, V, T) T foo##X = V #define $Macro_decl[[DEF_VAR]](X, V) int X = V #define $Macro_decl[[DEF_VAR_T]](T, X, V) T X = V - #define $Macro_decl[[DEF_VAR_REV]](V, X) DEF_VAR(X, V) + #define $Macro_decl[[DEF_VAR_REV]](V, X) $Macro[[DEF_VAR]](X, V) #define $Macro_decl[[CPY]](X) X #define $Macro_decl[[DEF_VAR_TYPE]](X, Y) X Y #define $Macro_decl[[SOME_NAME]] variable @@ -431,7 +431,7 @@ TEST(SemanticHighlighting, GetsCorrectTokens) { )cpp", R"cpp( #define $Macro_decl[[fail]](expr) expr - #define $Macro_decl[[assert]](COND) if (!(COND)) { fail("assertion failed" #COND); } + #define $Macro_decl[[assert]](COND) if (!(COND)) { $Macro[[fail]]("assertion failed" #COND); } // Preamble ends. int $Variable_def[[x]]; int $Variable_def[[y]]; diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 3f79e8e2a187a..a5f090045615c 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -162,6 +162,10 @@ Changes in existing checks ` check. Global options of the same name should be used instead. +- Improved :doc:`bugprone-use-after-move + ` check to also cover constructor + initializers. + - Deprecated check-local options `HeaderFileExtensions` in :doc:`google-build-namespaces ` check. @@ -234,6 +238,10 @@ Changes in existing checks string for ``Prefix`` or ``Suffix`` options could result in the style not being used. +- Fixed an issue in :doc:`google-readability-avoid-underscore-in-googletest-name + ` when using + ``DISABLED_`` in the test suite name. + Removed checks ^^^^^^^^^^^^^^ diff --git a/clang-tools-extra/docs/clang-tidy/checks/google/readability-avoid-underscore-in-googletest-name.rst b/clang-tools-extra/docs/clang-tidy/checks/google/readability-avoid-underscore-in-googletest-name.rst index f2053b4d2fcd3..e667fd12222bb 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/google/readability-avoid-underscore-in-googletest-name.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/google/readability-avoid-underscore-in-googletest-name.rst @@ -3,8 +3,8 @@ google-readability-avoid-underscore-in-googletest-name ====================================================== -Checks whether there are underscores in googletest test and test case names in -test macros: +Checks whether there are underscores in googletest test suite names and test +names in test macros: - ``TEST`` - ``TEST_F`` @@ -18,17 +18,17 @@ For example: .. code-block:: c++ - TEST(TestCaseName, Illegal_TestName) {} - TEST(Illegal_TestCaseName, TestName) {} + TEST(TestSuiteName, Illegal_TestName) {} + TEST(Illegal_TestSuiteName, TestName) {} -would trigger the check. `Underscores are not allowed`_ in test names nor test -case names. +would trigger the check. `Underscores are not allowed`_ in test suite name nor +test names. -The ``DISABLED_`` prefix, which may be used to `disable individual tests`_, is -ignored when checking test names, but the rest of the rest of the test name is -still checked. +The ``DISABLED_`` prefix, which may be used to +`disable test suites and individual tests`_, is removed from the test suite name +and test name before checking for underscores. This check does not propose any fixes. .. _Underscores are not allowed: https://google.github.io/googletest/faq.html#why-should-test-suite-names-and-test-names-not-contain-underscore -.. _disable individual tests: https://google.github.io/googletest/advanced.html#temporarily-disabling-tests +.. _disable test suites and individual tests: https://google.github.io/googletest/advanced.html#temporarily-disabling-tests diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h index cd11700548075..66916a52046cb 100644 --- a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h +++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h @@ -16,11 +16,13 @@ #include "clang/Format/Format.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/MemoryBufferRef.h" #include namespace clang { class SourceLocation; +class SourceManager; class Decl; class FileEntry; class HeaderSearch; @@ -75,6 +77,14 @@ std::string fixIncludes(const AnalysisResults &Results, llvm::StringRef Code, std::string spellHeader(const Header &H, HeaderSearch &HS, const FileEntry *Main); + +/// Gets all the providers for a symbol by traversing each location. +/// Returned headers are sorted by relevance, first element is the most +/// likely provider for the symbol. +llvm::SmallVector
headersForSymbol(const Symbol &S, + const SourceManager &SM, + const PragmaIncludes *PI); + } // namespace include_cleaner } // namespace clang diff --git a/clang-tools-extra/include-cleaner/lib/Analysis.cpp b/clang-tools-extra/include-cleaner/lib/Analysis.cpp index 6237bdb46babf..fb0879b7aab63 100644 --- a/clang-tools-extra/include-cleaner/lib/Analysis.cpp +++ b/clang-tools-extra/include-cleaner/lib/Analysis.cpp @@ -90,9 +90,25 @@ AnalysisResults analyze(llvm::ArrayRef ASTRoots, }); AnalysisResults Results; - for (const Include &I : Inc.all()) - if (!Used.contains(&I) && PI && !PI->shouldKeep(I.Line)) - Results.Unused.push_back(&I); + for (const Include &I : Inc.all()) { + if (Used.contains(&I)) + continue; + if (PI) { + if (PI->shouldKeep(I.Line)) + continue; + // Check if main file is the public interface for a private header. If so + // we shouldn't diagnose it as unused. + if (auto PHeader = PI->getPublic(I.Resolved); !PHeader.empty()) { + PHeader = PHeader.trim("<>\""); + // Since most private -> public mappings happen in a verbatim way, we + // check textually here. This might go wrong in presence of symlinks or + // header mappings. But that's not different than rest of the places. + if (MainFile->tryGetRealPathName().endswith(PHeader)) + continue; + } + } + Results.Unused.push_back(&I); + } for (llvm::StringRef S : Missing.keys()) Results.Missing.push_back(S.str()); llvm::sort(Results.Missing); diff --git a/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h b/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h index acf462919344b..6bfed91b584b3 100644 --- a/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h +++ b/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h @@ -22,6 +22,7 @@ #define CLANG_INCLUDE_CLEANER_ANALYSISINTERNAL_H #include "TypesInternal.h" +#include "clang-include-cleaner/Analysis.h" #include "clang-include-cleaner/Record.h" #include "clang-include-cleaner/Types.h" #include "llvm/ADT/STLFunctionalExtras.h" @@ -58,13 +59,6 @@ llvm::SmallVector> findHeaders(const SymbolLocation &Loc, /// A set of locations that provides the declaration. std::vector> locateSymbol(const Symbol &S); -/// Gets all the providers for a symbol by traversing each location. -/// Returned headers are sorted by relevance, first element is the most -/// likely provider for the symbol. -llvm::SmallVector
headersForSymbol(const Symbol &S, - const SourceManager &SM, - const PragmaIncludes *PI); - /// Write an HTML summary of the analysis to the given stream. void writeHTMLReport(FileID File, const Includes &, llvm::ArrayRef Roots, diff --git a/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp b/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp index c34c6c0a29a81..a2084d4f37903 100644 --- a/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp @@ -24,6 +24,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include +#include namespace clang::include_cleaner { namespace { @@ -212,17 +213,34 @@ int x = a + c; return std::make_unique(PP, PI); }; - TestAST AST(Inputs); - auto Decls = AST.context().getTranslationUnitDecl()->decls(); - auto Results = - analyze(std::vector{Decls.begin(), Decls.end()}, - PP.MacroReferences, PP.Includes, &PI, AST.sourceManager(), - AST.preprocessor().getHeaderSearchInfo()); + { + TestAST AST(Inputs); + auto Decls = AST.context().getTranslationUnitDecl()->decls(); + auto Results = + analyze(std::vector{Decls.begin(), Decls.end()}, + PP.MacroReferences, PP.Includes, &PI, AST.sourceManager(), + AST.preprocessor().getHeaderSearchInfo()); + + const Include *B = PP.Includes.atLine(3); + ASSERT_EQ(B->Spelled, "b.h"); + EXPECT_THAT(Results.Missing, ElementsAre("\"c.h\"")); + EXPECT_THAT(Results.Unused, ElementsAre(B)); + } - const Include *B = PP.Includes.atLine(3); - ASSERT_EQ(B->Spelled, "b.h"); - EXPECT_THAT(Results.Missing, ElementsAre("\"c.h\"")); - EXPECT_THAT(Results.Unused, ElementsAre(B)); + // Check that umbrella header uses private include. + { + Inputs.Code = R"cpp(#include "private.h")cpp"; + Inputs.ExtraFiles["private.h"] = + guard("// IWYU pragma: private, include \"public.h\""); + Inputs.FileName = "public.h"; + PP.Includes = {}; + PI = {}; + TestAST AST(Inputs); + EXPECT_FALSE(PP.Includes.all().empty()); + auto Results = analyze({}, {}, PP.Includes, &PI, AST.sourceManager(), + AST.preprocessor().getHeaderSearchInfo()); + EXPECT_THAT(Results.Unused, testing::IsEmpty()); + } } TEST(FixIncludes, Basic) { diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/use-after-move.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/use-after-move.cpp index 45cef8abfd1f6..1e0831048dbd4 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/use-after-move.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/use-after-move.cpp @@ -369,6 +369,18 @@ void lambdas() { }; a.foo(); } + // Don't warn if 'a' is a copy inside a synchronous lambda + { + A a; + A copied{[a] mutable { return std::move(a); }()}; + a.foo(); + } + // False negative (should warn if 'a' is a ref inside a synchronous lambda) + { + A a; + A moved{[&a] mutable { return std::move(a); }()}; + a.foo(); + } // Warn if the use consists of a capture that happens after a move. { A a; @@ -1367,6 +1379,120 @@ void typeId() { } } // namespace UnevalContext +class CtorInit { +public: + CtorInit(std::string val) + : a{val.empty()}, // fine + s{std::move(val)}, + b{val.empty()} + // CHECK-NOTES: [[@LINE-1]]:11: warning: 'val' used after it was moved + // CHECK-NOTES: [[@LINE-3]]:9: note: move occurred here + {} + +private: + bool a; + std::string s; + bool b; +}; + +class CtorInitLambda { +public: + CtorInitLambda(std::string val) + : a{val.empty()}, // fine + s{std::move(val)}, + b{[&] { return val.empty(); }()}, + // CHECK-NOTES: [[@LINE-1]]:12: warning: 'val' used after it was moved + // CHECK-NOTES: [[@LINE-3]]:9: note: move occurred here + c{[] { + std::string str{}; + std::move(str); + return str.empty(); + // CHECK-NOTES: [[@LINE-1]]:18: warning: 'str' used after it was moved + // CHECK-NOTES: [[@LINE-3]]:11: note: move occurred here + }()} { + std::move(val); + // CHECK-NOTES: [[@LINE-1]]:15: warning: 'val' used after it was moved + // CHECK-NOTES: [[@LINE-13]]:9: note: move occurred here + std::string val2{}; + std::move(val2); + val2.empty(); + // CHECK-NOTES: [[@LINE-1]]:5: warning: 'val2' used after it was moved + // CHECK-NOTES: [[@LINE-3]]:5: note: move occurred here + } + +private: + bool a; + std::string s; + bool b; + bool c; + bool d{}; +}; + +class CtorInitOrder { +public: + CtorInitOrder(std::string val) + : a{val.empty()}, // fine + b{val.empty()}, + // CHECK-NOTES: [[@LINE-1]]:11: warning: 'val' used after it was moved + s{std::move(val)} {} // wrong order + // CHECK-NOTES: [[@LINE-1]]:9: note: move occurred here + // CHECK-NOTES: [[@LINE-4]]:11: note: the use happens in a later loop iteration than the move + +private: + bool a; + std::string s; + bool b; +}; + +struct Obj {}; +struct CtorD { + CtorD(Obj b); +}; + +struct CtorC { + CtorC(Obj b); +}; + +struct CtorB { + CtorB(Obj &b); +}; + +struct CtorA : CtorB, CtorC, CtorD { + CtorA(Obj b) : CtorB{b}, CtorC{std::move(b)}, CtorD{b} {} + // CHECK-NOTES: [[@LINE-1]]:55: warning: 'b' used after it was moved + // CHECK-NOTES: [[@LINE-2]]:34: note: move occurred here +}; + +struct Base { + Base(Obj b) : bb{std::move(b)} {} + template Base(Call &&c) : bb{c()} {}; + + Obj bb; +}; + +struct Derived : Base, CtorC { + Derived(Obj b) + : Base{[&] mutable { return std::move(b); }()}, + // False negative: The lambda/std::move was executed, so it should warn + // below + CtorC{b} {} +}; + +struct Derived2 : Base, CtorC { + Derived2(Obj b) + : Base{[&] mutable { return std::move(b); }}, + // This was a move, but it doesn't warn below, because it can't know if + // the lambda/std::move was actually called + CtorC{b} {} +}; + +struct Derived3 : Base, CtorC { + Derived3(Obj b) + : Base{[c = std::move(b)] mutable { return std::move(c); }}, CtorC{b} {} + // CHECK-NOTES: [[@LINE-1]]:74: warning: 'b' used after it was moved + // CHECK-NOTES: [[@LINE-2]]:19: note: move occurred here +}; + class PR38187 { public: PR38187(std::string val) : val_(std::move(val)) { diff --git a/clang-tools-extra/test/clang-tidy/checkers/google/avoid-underscore-in-googletest-name.cpp b/clang-tools-extra/test/clang-tidy/checkers/google/avoid-underscore-in-googletest-name.cpp new file mode 100644 index 0000000000000..0e43735c2105c --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/google/avoid-underscore-in-googletest-name.cpp @@ -0,0 +1,118 @@ +// RUN: %check_clang_tidy %s google-readability-avoid-underscore-in-googletest-name %t + +#define TEST(test_suite_name, test_name) void test_suite_name##test_name() +#define TEST_F(test_suite_name, test_name) void test_suite_name##test_name() +#define TEST_P(test_suite_name, test_name) void test_suite_name##test_name() +#define TYPED_TEST(test_suite_name, test_name) void test_suite_name##test_name() +#define TYPED_TEST_P(test_suite_name, test_name) void test_suite_name##test_name() +#define FRIEND_TEST(test_suite_name, test_name) void test_suite_name##test_name() + +TEST(TestSuiteName, Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +TEST(TestSuiteName, DISABLED_Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TEST(TestSuiteName, Illegal_Test_Name) {} +// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: avoid using "_" in test name "Illegal_Test_Name" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TEST(Illegal_TestSuiteName, TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:6: warning: avoid using "_" in test suite name "Illegal_TestSuiteName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TEST(Illegal_Test_SuiteName, TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:6: warning: avoid using "_" in test suite name "Illegal_Test_SuiteName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TEST(Illegal_TestSuiteName, Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:6: warning: avoid using "_" in test suite name "Illegal_TestSuiteName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +// CHECK-MESSAGES: :[[@LINE-2]]:29: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +TEST_F(TestSuiteFixtureName, Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:30: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TEST_F(TestSuiteFixtureName, DISABLED_Illegal_Test_Name) {} +// CHECK-MESSAGES: :[[@LINE-1]]:30: warning: avoid using "_" in test name "Illegal_Test_Name" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TEST_F(TestSuiteFixtureName, Illegal_Test_Name) {} +// CHECK-MESSAGES: :[[@LINE-1]]:30: warning: avoid using "_" in test name "Illegal_Test_Name" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +TEST_F(Illegal_TestSuiteFixtureName, TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: avoid using "_" in test suite name "Illegal_TestSuiteFixtureName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TEST_F(Illegal_TestSuiteFixtureName, Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: avoid using "_" in test suite name "Illegal_TestSuiteFixtureName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +// CHECK-MESSAGES: :[[@LINE-2]]:38: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +TEST_F(Illegal_Test_SuiteFixtureName, TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: avoid using "_" in test suite name "Illegal_Test_SuiteFixtureName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +TEST_P(ParameterizedTestSuiteFixtureName, Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:43: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TEST_P(ParameterizedTestSuiteFixtureName, DISABLED_Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:43: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TEST_P(ParameterizedTestSuiteFixtureName, Illegal_Test_Name) {} +// CHECK-MESSAGES: :[[@LINE-1]]:43: warning: avoid using "_" in test name "Illegal_Test_Name" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +TEST_P(Illegal_ParameterizedTestSuiteFixtureName, TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: avoid using "_" in test suite name "Illegal_ParameterizedTestSuiteFixtureName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TEST_P(Illegal_ParameterizedTestSuiteFixtureName, Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: avoid using "_" in test suite name "Illegal_ParameterizedTestSuiteFixtureName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +// CHECK-MESSAGES: :[[@LINE-2]]:51: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +TEST_P(Illegal_Parameterized_TestSuiteFixtureName, TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: avoid using "_" in test suite name "Illegal_Parameterized_TestSuiteFixtureName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +TYPED_TEST(TypedTestSuiteName, Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:32: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TYPED_TEST(TypedTestSuiteName, DISABLED_Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:32: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TYPED_TEST(TypedTestSuiteName, Illegal_Test_Name) {} +// CHECK-MESSAGES: :[[@LINE-1]]:32: warning: avoid using "_" in test name "Illegal_Test_Name" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +TYPED_TEST(Illegal_TypedTestSuiteName, TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: avoid using "_" in test suite name "Illegal_TypedTestSuiteName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TYPED_TEST(Illegal_TypedTestSuiteName, Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: avoid using "_" in test suite name "Illegal_TypedTestSuiteName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +// CHECK-MESSAGES: :[[@LINE-2]]:40: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +TYPED_TEST(Illegal_Typed_TestSuiteName, TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: avoid using "_" in test suite name "Illegal_Typed_TestSuiteName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +TYPED_TEST_P(TypeParameterizedTestSuiteName, Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:46: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TYPED_TEST_P(TypeParameterizedTestSuiteName, DISABLED_Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:46: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TYPED_TEST_P(TypeParameterizedTestSuiteName, Illegal_Test_Name) {} +// CHECK-MESSAGES: :[[@LINE-1]]:46: warning: avoid using "_" in test name "Illegal_Test_Name" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +TYPED_TEST_P(Illegal_TypeParameterizedTestSuiteName, TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:14: warning: avoid using "_" in test suite name "Illegal_TypeParameterizedTestSuiteName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TYPED_TEST_P(Illegal_TypeParameterizedTestSuiteName, Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:14: warning: avoid using "_" in test suite name "Illegal_TypeParameterizedTestSuiteName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +// CHECK-MESSAGES: :[[@LINE-2]]:54: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +TYPED_TEST_P(Illegal_Type_ParameterizedTestSuiteName, TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:14: warning: avoid using "_" in test suite name "Illegal_Type_ParameterizedTestSuiteName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +// Underscores are allowed to disable a test with the DISABLED_ prefix. +// https://google.github.io/googletest/faq.html#why-should-test-suite-names-and-test-names-not-contain-underscore +TEST(TestSuiteName, TestName) {} +TEST(TestSuiteName, DISABLED_TestName) {} +TEST(DISABLED_TestSuiteName, TestName) {} +TEST(DISABLED_TestSuiteName, DISABLED_TestName) {} + +TEST_F(TestSuiteFixtureName, TestName) {} +TEST_F(TestSuiteFixtureName, DISABLED_TestName) {} +TEST_F(DISABLED_TestSuiteFixtureName, TestName) {} +TEST_F(DISABLED_TestSuiteFixtureName, DISABLED_TestName) {} + +TEST_P(ParameterizedTestSuiteFixtureName, TestName) {} +TEST_P(ParameterizedTestSuiteFixtureName, DISABLED_TestName) {} +TEST_P(DISABLED_ParameterizedTestSuiteFixtureName, TestName) {} +TEST_P(DISABLED_ParameterizedTestSuiteFixtureName, DISABLED_TestName) {} + +TYPED_TEST(TypedTestSuiteName, TestName) {} +TYPED_TEST(TypedTestSuiteName, DISABLED_TestName) {} +TYPED_TEST(DISABLED_TypedTestSuiteName, TestName) {} +TYPED_TEST(DISABLED_TypedTestSuiteName, DISABLED_TestName) {} + +TYPED_TEST_P(TypeParameterizedTestSuiteName, TestName) {} +TYPED_TEST_P(TypeParameterizedTestSuiteName, DISABLED_TestName) {} +TYPED_TEST_P(DISABLED_TypeParameterizedTestSuiteName, TestName) {} +TYPED_TEST_P(DISABLED_TypeParameterizedTestSuiteName, DISABLED_TestName) {} + +FRIEND_TEST(FriendTestSuite, Is_NotChecked) {} +FRIEND_TEST(Friend_TestSuite, IsNotChecked) {} +FRIEND_TEST(Friend_TestSuite, Is_NotChecked) {} diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/avoid-underscore-in-googletest-name.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/avoid-underscore-in-googletest-name.cpp deleted file mode 100644 index 6e8a5c2d50af9..0000000000000 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/avoid-underscore-in-googletest-name.cpp +++ /dev/null @@ -1,108 +0,0 @@ -// RUN: %check_clang_tidy %s google-readability-avoid-underscore-in-googletest-name %t - -#define TEST(test_case_name, test_name) void test_case_name##test_name() -#define TEST_F(test_case_name, test_name) void test_case_name##test_name() -#define TEST_P(test_case_name, test_name) void test_case_name##test_name() -#define TYPED_TEST(test_case_name, test_name) void test_case_name##test_name() -#define TYPED_TEST_P(test_case_name, test_name) void test_case_name##test_name() -#define FRIEND_TEST(test_case_name, test_name) void test_case_name##test_name() - -TEST(TestCaseName, Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:20: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -TEST(TestCaseName, DISABLED_Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:20: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TEST(TestCaseName, Illegal_Test_Name) {} -// CHECK-MESSAGES: :[[@LINE-1]]:20: warning: avoid using "_" in test name "Illegal_Test_Name" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TEST(Illegal_TestCaseName, TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:6: warning: avoid using "_" in test case name "Illegal_TestCaseName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TEST(Illegal_Test_CaseName, TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:6: warning: avoid using "_" in test case name "Illegal_Test_CaseName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TEST(Illegal_TestCaseName, Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:6: warning: avoid using "_" in test case name "Illegal_TestCaseName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -// CHECK-MESSAGES: :[[@LINE-2]]:28: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -TEST_F(TestCaseFixtureName, Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:29: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TEST_F(TestCaseFixtureName, DISABLED_Illegal_Test_Name) {} -// CHECK-MESSAGES: :[[@LINE-1]]:29: warning: avoid using "_" in test name "Illegal_Test_Name" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TEST_F(TestCaseFixtureName, Illegal_Test_Name) {} -// CHECK-MESSAGES: :[[@LINE-1]]:29: warning: avoid using "_" in test name "Illegal_Test_Name" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -TEST_F(Illegal_TestCaseFixtureName, TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: avoid using "_" in test case name "Illegal_TestCaseFixtureName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TEST_F(Illegal_TestCaseFixtureName, Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: avoid using "_" in test case name "Illegal_TestCaseFixtureName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -// CHECK-MESSAGES: :[[@LINE-2]]:37: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -TEST_F(Illegal_Test_CaseFixtureName, TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: avoid using "_" in test case name "Illegal_Test_CaseFixtureName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -TEST_P(ParameterizedTestCaseFixtureName, Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:42: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TEST_P(ParameterizedTestCaseFixtureName, DISABLED_Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:42: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TEST_P(ParameterizedTestCaseFixtureName, Illegal_Test_Name) {} -// CHECK-MESSAGES: :[[@LINE-1]]:42: warning: avoid using "_" in test name "Illegal_Test_Name" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -TEST_P(Illegal_ParameterizedTestCaseFixtureName, TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: avoid using "_" in test case name "Illegal_ParameterizedTestCaseFixtureName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TEST_P(Illegal_ParameterizedTestCaseFixtureName, Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: avoid using "_" in test case name "Illegal_ParameterizedTestCaseFixtureName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -// CHECK-MESSAGES: :[[@LINE-2]]:50: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -TEST_P(Illegal_Parameterized_TestCaseFixtureName, TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: avoid using "_" in test case name "Illegal_Parameterized_TestCaseFixtureName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -TYPED_TEST(TypedTestCaseName, Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TYPED_TEST(TypedTestCaseName, DISABLED_Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TYPED_TEST(TypedTestCaseName, Illegal_Test_Name) {} -// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: avoid using "_" in test name "Illegal_Test_Name" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -TYPED_TEST(Illegal_TypedTestCaseName, TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: avoid using "_" in test case name "Illegal_TypedTestCaseName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TYPED_TEST(Illegal_TypedTestCaseName, Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: avoid using "_" in test case name "Illegal_TypedTestCaseName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -// CHECK-MESSAGES: :[[@LINE-2]]:39: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -TYPED_TEST(Illegal_Typed_TestCaseName, TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: avoid using "_" in test case name "Illegal_Typed_TestCaseName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -TYPED_TEST_P(TypeParameterizedTestCaseName, Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:45: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TYPED_TEST_P(TypeParameterizedTestCaseName, DISABLED_Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:45: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TYPED_TEST_P(TypeParameterizedTestCaseName, Illegal_Test_Name) {} -// CHECK-MESSAGES: :[[@LINE-1]]:45: warning: avoid using "_" in test name "Illegal_Test_Name" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -TYPED_TEST_P(Illegal_TypeParameterizedTestCaseName, TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:14: warning: avoid using "_" in test case name "Illegal_TypeParameterizedTestCaseName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TYPED_TEST_P(Illegal_TypeParameterizedTestCaseName, Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:14: warning: avoid using "_" in test case name "Illegal_TypeParameterizedTestCaseName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -// CHECK-MESSAGES: :[[@LINE-2]]:53: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -TYPED_TEST_P(Illegal_Type_ParameterizedTestCaseName, TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:14: warning: avoid using "_" in test case name "Illegal_Type_ParameterizedTestCaseName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -// Underscores are allowed to disable a test with the DISABLED_ prefix. -// https://github.com/google/googletest/blob/master/googletest/docs/faq.md#why-should-test-suite-names-and-test-names-not-contain-underscore -TEST(TestCaseName, TestName) {} -TEST(TestCaseName, DISABLED_TestName) {} - -TEST_F(TestCaseFixtureName, TestName) {} -TEST_F(TestCaseFixtureName, DISABLED_TestName) {} - -TEST_P(ParameterizedTestCaseFixtureName, TestName) {} -TEST_P(ParameterizedTestCaseFixtureName, DISABLED_TestName) {} - -TYPED_TEST(TypedTestName, TestName) {} -TYPED_TEST(TypedTestName, DISABLED_TestName) {} - -TYPED_TEST_P(TypeParameterizedTestName, TestName) {} -TYPED_TEST_P(TypeParameterizedTestName, DISABLED_TestName) {} - -FRIEND_TEST(FriendTest, Is_NotChecked) {} -FRIEND_TEST(Friend_Test, IsNotChecked) {} -FRIEND_TEST(Friend_Test, Is_NotChecked) {} diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake index c874d8cacd197..037cb67e82189 100644 --- a/clang/cmake/caches/Fuchsia-stage2.cmake +++ b/clang/cmake/caches/Fuchsia-stage2.cmake @@ -204,7 +204,7 @@ if(FUCHSIA_SDK) set(BUILTINS_${target}_CMAKE_SYSROOT ${FUCHSIA_${target}_SYSROOT} CACHE PATH "") endforeach() - foreach(target x86_64-unknown-fuchsia;aarch64-unknown-fuchsia) + foreach(target x86_64-unknown-fuchsia;aarch64-unknown-fuchsia;riscv64-unknown-fuchsia) # Set the per-target runtimes options. list(APPEND RUNTIME_TARGETS "${target}") set(RUNTIMES_${target}_CMAKE_SYSTEM_NAME Fuchsia CACHE STRING "") @@ -276,12 +276,12 @@ if(FUCHSIA_SDK) set(LLVM_RUNTIME_MULTILIBS "asan;noexcept;compat;asan+noexcept;hwasan;hwasan+noexcept" CACHE STRING "") - set(LLVM_RUNTIME_MULTILIB_asan_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia" CACHE STRING "") - set(LLVM_RUNTIME_MULTILIB_noexcept_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia" CACHE STRING "") - set(LLVM_RUNTIME_MULTILIB_compat_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia" CACHE STRING "") - set(LLVM_RUNTIME_MULTILIB_asan+noexcept_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia" CACHE STRING "") - set(LLVM_RUNTIME_MULTILIB_hwasan_TARGETS "aarch64-unknown-fuchsia" CACHE STRING "") - set(LLVM_RUNTIME_MULTILIB_hwasan+noexcept_TARGETS "aarch64-unknown-fuchsia" CACHE STRING "") + set(LLVM_RUNTIME_MULTILIB_asan_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia;riscv64-unknown-fuchsia" CACHE STRING "") + set(LLVM_RUNTIME_MULTILIB_noexcept_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia;riscv64-unknown-fuchsia" CACHE STRING "") + set(LLVM_RUNTIME_MULTILIB_compat_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia;riscv64-unknown-fuchsia" CACHE STRING "") + set(LLVM_RUNTIME_MULTILIB_asan+noexcept_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia;riscv64-unknown-fuchsia" CACHE STRING "") + set(LLVM_RUNTIME_MULTILIB_hwasan_TARGETS "aarch64-unknown-fuchsia;riscv64-unknown-fuchsia" CACHE STRING "") + set(LLVM_RUNTIME_MULTILIB_hwasan+noexcept_TARGETS "aarch64-unknown-fuchsia;riscv64-unknown-fuchsia" CACHE STRING "") endif() set(LLVM_BUILTIN_TARGETS "${BUILTIN_TARGETS}" CACHE STRING "") diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index f8c83d4d6d162..a9bdc83c53e7a 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -2358,6 +2358,46 @@ evaluated, so any side effects of the expression will be discarded. Query for this feature with ``__has_builtin(__builtin_assume)``. +.. _langext-__builtin_assume_separate_storage: + +``__builtin_assume_separate_storage`` +-------------------- + +``__builtin_assume_separate_storage`` is used to provide the optimizer with the +knowledge that its two arguments point to separately allocated objects. + +**Syntax**: + +.. code-block:: c++ + + __builtin_assume_separate_storage(const volatile void *, const volatile void *) + +**Example of Use**: + +.. code-block:: c++ + + int foo(int *x, int *y) { + __builtin_assume_separate_storage(x, y); + *x = 0; + *y = 1; + // The optimizer may optimize this to return 0 without reloading from *x. + return *x; + } + +**Description**: + +The arguments to this function are assumed to point into separately allocated +storage (either different variable definitions or different dynamic storage +allocations). The optimizer may use this fact to aid in alias analysis. If the +arguments point into the same storage, the behavior is undefined. Note that the +definition of "storage" here refers to the outermost enclosing allocation of any +particular object (so for example, it's never correct to call this function +passing the addresses of fields in the same struct, elements of the same array, +etc.). + +Query for this feature with ``__has_builtin(__builtin_assume_separate_storage)``. + + ``__builtin_offsetof`` ---------------------- diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 005bf99a62457..29e3f516c06e5 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -124,6 +124,8 @@ Non-comprehensive list of changes in this release - Clang now supports ``__builtin_FILE_NAME()`` which returns the same information as the ``__FILE_NAME__`` macro (the presumed file name from the invocation point, with no path components included). +- Clang now supports ``__builtin_assume_separate_storage`` that indicates that + its arguments point to objects in separate storage allocations. New Compiler Flags ------------------ @@ -225,6 +227,8 @@ Bug Fixes in This Version enabling short-circuiting coroutines use cases. This fixes (`#56532 `_) in antecipation of `CWG2563 _`. +- Fix highlighting issue with ``_Complex`` and initialization list with more than + 2 items. (`#61518 `_) Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index 89f7c1ed2ba24..a8485682c1d1f 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -32,6 +32,7 @@ WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) FIXABLE_GADGET(ULCArraySubscript) FIXABLE_GADGET(DerefSimplePtrArithFixable) +FIXABLE_GADGET(PointerDereference) #undef FIXABLE_GADGET #undef WARNING_GADGET diff --git a/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h b/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h index e641468f77d00..3495bdfc538cb 100644 --- a/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h +++ b/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h @@ -18,6 +18,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/Stmt.h" #include "clang/Analysis/CFG.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Support/Error.h" #include @@ -47,18 +48,26 @@ class ControlFlowContext { return StmtToBlock; } + /// Returns whether `B` is reachable from the entry block. + bool isBlockReachable(const CFGBlock &B) const { + return BlockReachable[B.getBlockID()]; + } + private: // FIXME: Once the deprecated `build` method is removed, mark `D` as "must not // be null" and add an assertion. ControlFlowContext(const Decl *D, std::unique_ptr Cfg, - llvm::DenseMap StmtToBlock) + llvm::DenseMap StmtToBlock, + llvm::BitVector BlockReachable) : ContainingDecl(D), Cfg(std::move(Cfg)), - StmtToBlock(std::move(StmtToBlock)) {} + StmtToBlock(std::move(StmtToBlock)), + BlockReachable(std::move(BlockReachable)) {} /// The `Decl` containing the statement used to construct the CFG. const Decl *ContainingDecl; std::unique_ptr Cfg; llvm::DenseMap StmtToBlock; + llvm::BitVector BlockReachable; }; } // namespace dataflow diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h index 702aaff9c7e71..a044f477ce1b5 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h @@ -34,6 +34,7 @@ namespace clang { namespace dataflow { +class Logger; /// Skip past nodes that the CFG does not emit. These nodes are invisible to /// flow-sensitive analysis, and should be ignored as they will effectively not @@ -67,6 +68,11 @@ class DataflowAnalysisContext { /// fundamentally limited: some constructs, such as recursion, are /// explicitly unsupported. std::optional ContextSensitiveOpts; + + /// If provided, analysis details will be recorded here. + /// (This is always non-null within an AnalysisContext, the framework + /// provides a fallback no-op logger). + Logger *Log = nullptr; }; /// Constructs a dataflow analysis context. @@ -76,11 +82,9 @@ class DataflowAnalysisContext { /// `S` must not be null. DataflowAnalysisContext(std::unique_ptr S, Options Opts = Options{ - /*ContextSensitiveOpts=*/std::nullopt}) - : S(std::move(S)), TrueVal(createAtomicBoolValue()), - FalseVal(createAtomicBoolValue()), Opts(Opts) { - assert(this->S != nullptr); - } + /*ContextSensitiveOpts=*/std::nullopt, + /*Logger=*/nullptr}); + ~DataflowAnalysisContext(); /// Takes ownership of `Loc` and returns a reference to it. /// @@ -393,6 +397,8 @@ class DataflowAnalysisContext { // Fields modeled by environments covered by this context. llvm::DenseSet ModeledFields; + + std::unique_ptr LogOwner; // If created via flags. }; } // namespace dataflow diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h index e457430a5e646..678e5b871cc83 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -22,6 +22,7 @@ #include "clang/Analysis/FlowSensitive/ControlFlowContext.h" #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" #include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "clang/Analysis/FlowSensitive/Logger.h" #include "clang/Analysis/FlowSensitive/StorageLocation.h" #include "clang/Analysis/FlowSensitive/Value.h" #include "llvm/ADT/DenseMap.h" @@ -177,10 +178,12 @@ class Environment { /// with a symbolic representation of the `this` pointee. Environment(DataflowAnalysisContext &DACtx, const DeclContext &DeclCtx); - const DataflowAnalysisContext::Options &getAnalysisOptions() { + const DataflowAnalysisContext::Options &getAnalysisOptions() const { return DACtx->getOptions(); } + Logger &logger() const { return *DACtx->getOptions().Log; } + /// Creates and returns an environment to use for an inline analysis of the /// callee. Uses the storage location from each argument in the `Call` as the /// storage location for the corresponding parameter in the callee. diff --git a/clang/include/clang/Analysis/FlowSensitive/Logger.h b/clang/include/clang/Analysis/FlowSensitive/Logger.h new file mode 100644 index 0000000000000..903dfbc30d40d --- /dev/null +++ b/clang/include/clang/Analysis/FlowSensitive/Logger.h @@ -0,0 +1,85 @@ +//===-- Logger.h ------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_LOGGER_H +#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_LOGGER_H + +#include "clang/Analysis/CFG.h" +#include "llvm/Support/raw_ostream.h" +#include + +namespace clang::dataflow { +// Forward declarations so we can use Logger anywhere in the framework. +class ControlFlowContext; +class TypeErasedDataflowAnalysis; +struct TypeErasedDataflowAnalysisState; + +/// A logger is notified as the analysis progresses. +/// It can produce a report of the analysis's findings and how it came to them. +/// +/// The framework reports key structural events (e.g. traversal of blocks). +/// The specific analysis can add extra details to be presented in context. +class Logger { +public: + /// Returns a dummy logger that does nothing. + static Logger &null(); + /// A logger that simply writes messages to the specified ostream in real + /// time. + static std::unique_ptr textual(llvm::raw_ostream &); + + virtual ~Logger() = default; + + /// Called by the framework as we start analyzing a new function or statement. + /// Forms a pair with endAnalysis(). + virtual void beginAnalysis(const ControlFlowContext &, + TypeErasedDataflowAnalysis &) {} + virtual void endAnalysis() {} + + // At any time during the analysis, we're computing the state for some target + // program point. + + /// Called when we start (re-)processing a block in the CFG. + /// The target program point is the entry to the specified block. + /// Calls to log() describe transferBranch(), join() etc. + virtual void enterBlock(const CFGBlock &) {} + /// Called when we start processing an element in the current CFG block. + /// The target program point is after the specified element. + /// Calls to log() describe the transfer() function. + virtual void enterElement(const CFGElement &) {} + + /// Records the analysis state computed for the current program point. + virtual void recordState(TypeErasedDataflowAnalysisState &) {} + /// Records that the analysis state for the current block is now final. + virtual void blockConverged() {} + + /// Called by the framework or user code to report some event. + /// The event is associated with the current context (program point). + /// The Emit function produces the log message. It may or may not be called, + /// depending on if the logger is interested; it should have no side effects. + void log(llvm::function_ref Emit) { + if (!ShouldLogText) + return; + std::string S; + llvm::raw_string_ostream OS(S); + Emit(OS); + logText(S); + } + +protected: + /// ShouldLogText should be false for trivial loggers that ignore logText(). + /// This allows log() to skip evaluating its Emit function. + Logger(bool ShouldLogText = true) : ShouldLogText(ShouldLogText) {} + +private: + bool ShouldLogText; + virtual void logText(llvm::StringRef) {} +}; + +} // namespace clang::dataflow + +#endif diff --git a/clang/include/clang/Analysis/FlowSensitive/Transfer.h b/clang/include/clang/Analysis/FlowSensitive/Transfer.h index 78a426ed94dd5..db3d780bf35e5 100644 --- a/clang/include/clang/Analysis/FlowSensitive/Transfer.h +++ b/clang/include/clang/Analysis/FlowSensitive/Transfer.h @@ -26,9 +26,9 @@ class StmtToEnvMap { public: virtual ~StmtToEnvMap() = default; - /// Returns the environment of the basic block that contains `S` or nullptr if - /// there isn't one. - /// FIXME: Ensure that the result can't be null and return a const reference. + /// Retrieves the environment of the basic block that contains `S`. + /// If `S` is reachable, returns a non-null pointer to the environment. + /// If `S` is not reachable, returns nullptr. virtual const Environment *getEnvironment(const Stmt &S) const = 0; }; diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index 957375eccb84a..dea806099efbf 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -1591,6 +1591,7 @@ BUILTIN(__builtin_annotation, "v.", "tn") // Invariants BUILTIN(__builtin_assume, "vb", "nE") +BUILTIN(__builtin_assume_separate_storage, "vvCD*vCD*", "nE") // Multiprecision Arithmetic Builtins. BUILTIN(__builtin_addcb, "UcUcCUcCUcCUc*", "n") diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h index 387ce4d6e9b17..c0c99eb8b6d62 100644 --- a/clang/include/clang/Basic/Module.h +++ b/clang/include/clang/Basic/Module.h @@ -103,16 +103,22 @@ class alignas(8) Module { /// The location of the module definition. SourceLocation DefinitionLoc; + // FIXME: Consider if reducing the size of this enum (having Partition and + // Named modules only) then representing interface/implementation separately + // is more efficient. enum ModuleKind { /// This is a module that was defined by a module map and built out /// of header files. ModuleMapModule, + /// This is a C++ 20 header unit. + ModuleHeaderUnit, + /// This is a C++20 module interface unit. ModuleInterfaceUnit, - /// This is a C++ 20 header unit. - ModuleHeaderUnit, + /// This is a C++20 module implementation unit. + ModuleImplementationUnit, /// This is a C++ 20 module partition interface. ModulePartitionInterface, @@ -169,9 +175,16 @@ class alignas(8) Module { /// Does this Module scope describe part of the purview of a standard named /// C++ module? bool isModulePurview() const { - return Kind == ModuleInterfaceUnit || Kind == ModulePartitionInterface || - Kind == ModulePartitionImplementation || - Kind == PrivateModuleFragment; + switch (Kind) { + case ModuleInterfaceUnit: + case ModuleImplementationUnit: + case ModulePartitionInterface: + case ModulePartitionImplementation: + case PrivateModuleFragment: + return true; + default: + return false; + } } /// Does this Module scope describe a fragment of the global module within @@ -561,6 +574,11 @@ class alignas(8) Module { Kind == ModulePartitionImplementation; } + /// Is this a module implementation. + bool isModuleImplementation() const { + return Kind == ModuleImplementationUnit; + } + /// Is this module a header unit. bool isHeaderUnit() const { return Kind == ModuleHeaderUnit; } // Is this a C++20 module interface or a partition. diff --git a/clang/include/clang/Driver/Multilib.h b/clang/include/clang/Driver/Multilib.h index feb12f3638d34..9d6f1d23696b8 100644 --- a/clang/include/clang/Driver/Multilib.h +++ b/clang/include/clang/Driver/Multilib.h @@ -36,14 +36,13 @@ class Multilib { std::string OSSuffix; std::string IncludeSuffix; flags_list Flags; - int Priority; public: /// GCCSuffix, OSSuffix & IncludeSuffix will be appended directly to the /// sysroot string so they must either be empty or begin with a '/' character. /// This is enforced with an assert in the constructor. Multilib(StringRef GCCSuffix = {}, StringRef OSSuffix = {}, - StringRef IncludeSuffix = {}, int Priority = 0, + StringRef IncludeSuffix = {}, const flags_list &Flags = flags_list()); /// Get the detected GCC installation path suffix for the multi-arch @@ -62,10 +61,6 @@ class Multilib { /// All elements begin with either '+' or '-' const flags_list &flags() const { return Flags; } - /// Returns the multilib priority. When more than one multilib matches flags, - /// the one with the highest priority is selected, with 0 being the default. - int priority() const { return Priority; } - LLVM_DUMP_METHOD void dump() const; /// print summary of the Multilib void print(raw_ostream &OS) const; @@ -108,6 +103,9 @@ class MultilibSet { const_iterator begin() const { return Multilibs.begin(); } const_iterator end() const { return Multilibs.end(); } + /// Select compatible variants + multilib_list select(const Multilib::flags_list &Flags) const; + /// Pick the best multilib in the set, \returns false if none are compatible bool select(const Multilib::flags_list &Flags, Multilib &M) const; @@ -129,13 +127,6 @@ class MultilibSet { } const IncludeDirsFunc &filePathsCallback() const { return FilePathsCallback; } - -private: - /// Apply the filter to Multilibs and return the subset that remains - static multilib_list filterCopy(FilterCallback F, const multilib_list &Ms); - - /// Apply the filter to the multilib_list, removing those that don't match - static void filterInPlace(FilterCallback F, multilib_list &Ms); }; raw_ostream &operator<<(raw_ostream &OS, const MultilibSet &MS); diff --git a/clang/include/clang/Driver/MultilibBuilder.h b/clang/include/clang/Driver/MultilibBuilder.h index cf84c456152b1..f4875f2e03f8a 100644 --- a/clang/include/clang/Driver/MultilibBuilder.h +++ b/clang/include/clang/Driver/MultilibBuilder.h @@ -28,11 +28,10 @@ class MultilibBuilder { std::string OSSuffix; std::string IncludeSuffix; flags_list Flags; - int Priority; public: MultilibBuilder(StringRef GCCSuffix, StringRef OSSuffix, - StringRef IncludeSuffix, int Priority = 0); + StringRef IncludeSuffix); /// Initializes GCCSuffix, OSSuffix & IncludeSuffix to the same value. MultilibBuilder(StringRef Suffix = {}); @@ -75,10 +74,6 @@ class MultilibBuilder { const flags_list &flags() const { return Flags; } flags_list &flags() { return Flags; } - /// Returns the multilib priority. When more than one multilib matches flags, - /// the one with the highest priority is selected, with 0 being the default. - int priority() const { return Priority; } - /// Add a flag to the flags list /// \p Flag must be a flag accepted by the driver with its leading '-' /// removed, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index b50dfd6f35510..821e86c0260f3 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2638,7 +2638,11 @@ defm objc_avoid_heapify_local_blocks : BoolFOption<"objc-avoid-heapify-local-blo NegFlag, BothFlags<[CC1Option, NoDriverOption], " to avoid heapifying local blocks">>; -def fomit_frame_pointer : Flag<["-"], "fomit-frame-pointer">, Group; +def fomit_frame_pointer : Flag<["-"], "fomit-frame-pointer">, Group, + HelpText<"Omit the frame pointer from functions that don't need it. " + "Some stack unwinding cases, such as profilers and sanitizers, may prefer specifying -fno-omit-frame-pointer. " + "On many targets, -O1 and higher omit the frame pointer by default. " + "-m[no-]omit-leaf-frame-pointer takes precedence for leaf functions">; def fopenmp : Flag<["-"], "fopenmp">, Group, Flags<[CC1Option, NoArgumentUnused, FlangOption, FC1Option]>, HelpText<"Parse OpenMP pragmas and generate parallel code.">; def fno_openmp : Flag<["-"], "fno-openmp">, Group, Flags<[NoArgumentUnused]>; diff --git a/clang/include/clang/Lex/ModuleMap.h b/clang/include/clang/Lex/ModuleMap.h index a0ddd13c11bfd..f155c609b06cb 100644 --- a/clang/include/clang/Lex/ModuleMap.h +++ b/clang/include/clang/Lex/ModuleMap.h @@ -560,6 +560,11 @@ class ModuleMap { Module *createPrivateModuleFragmentForInterfaceUnit(Module *Parent, SourceLocation Loc); + /// Create a new C++ module with the specified kind, and reparent any pending + /// global module fragment(s) to it. + Module *createModuleUnitWithKind(SourceLocation Loc, StringRef Name, + Module::ModuleKind Kind); + /// Create a new module for a C++ module interface unit. /// The module must not already exist, and will be configured for the current /// compilation. @@ -569,6 +574,13 @@ class ModuleMap { /// \returns The newly-created module. Module *createModuleForInterfaceUnit(SourceLocation Loc, StringRef Name); + /// Create a new module for a C++ module implementation unit. + /// The interface module for this implementation (implicitly imported) must + /// exist and be loaded and present in the modules map. + /// + /// \returns The newly-created module. + Module *createModuleForImplementationUnit(SourceLocation Loc, StringRef Name); + /// Create a C++20 header unit. Module *createHeaderUnit(SourceLocation Loc, StringRef Name, Module::Header H); diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 63ee0f0ed7fb6..277c02ee3f1bd 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -2274,6 +2274,10 @@ class Sema final { }; /// The modules we're currently parsing. llvm::SmallVector ModuleScopes; + + /// For an interface unit, this is the implicitly imported interface unit. + clang::Module *ThePrimaryInterface = nullptr; + /// The explicit global module fragment of the current translation unit. /// The explicit Global Module Fragment, as specified in C++ /// [module.global.frag]. diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index 09ee1744e8945..d31fa38b93825 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -514,7 +514,6 @@ class ASTWriter : public ASTDeserializationListener, void WriteTypeAbbrevs(); void WriteType(QualType T); - bool isLookupResultExternal(StoredDeclsList &Result, DeclContext *DC); bool isLookupResultEntirelyExternal(StoredDeclsList &Result, DeclContext *DC); void GenerateNameLookupTable(const DeclContext *DC, diff --git a/clang/include/clang/Testing/TestAST.h b/clang/include/clang/Testing/TestAST.h index 7ba2ca882b91c..845e31f65438b 100644 --- a/clang/include/clang/Testing/TestAST.h +++ b/clang/include/clang/Testing/TestAST.h @@ -49,6 +49,9 @@ struct TestInputs { /// Keys are plain filenames ("foo.h"), values are file content. llvm::StringMap ExtraFiles = {}; + /// Filename to use for translation unit. A default will be used when empty. + std::string FileName; + /// By default, error diagnostics during parsing are reported as gtest errors. /// To suppress this, set ErrorOK or include "error-ok" in a comment in Code. /// In either case, all diagnostics appear in TestAST::diagnostics(). diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 56042e5fd252f..cd786049f914e 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -1600,6 +1600,7 @@ Module *Decl::getOwningModuleForLinkage(bool IgnoreLinkage) const { return nullptr; case Module::ModuleInterfaceUnit: + case Module::ModuleImplementationUnit: case Module::ModulePartitionInterface: case Module::ModulePartitionImplementation: return M; diff --git a/clang/lib/Analysis/FlowSensitive/CMakeLists.txt b/clang/lib/Analysis/FlowSensitive/CMakeLists.txt index 1a49998c39c20..a3216518f4dba 100644 --- a/clang/lib/Analysis/FlowSensitive/CMakeLists.txt +++ b/clang/lib/Analysis/FlowSensitive/CMakeLists.txt @@ -2,6 +2,7 @@ add_clang_library(clangAnalysisFlowSensitive ControlFlowContext.cpp DataflowAnalysisContext.cpp DataflowEnvironment.cpp + Logger.cpp Transfer.cpp TypeErasedDataflowAnalysis.cpp Value.cpp diff --git a/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp b/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp index 2492b5203724c..6699a0fc9d79e 100644 --- a/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp +++ b/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp @@ -16,6 +16,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/Stmt.h" #include "clang/Analysis/CFG.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Support/Error.h" #include @@ -44,6 +45,28 @@ buildStmtToBasicBlockMap(const CFG &Cfg) { return StmtToBlock; } +static llvm::BitVector findReachableBlocks(const CFG &Cfg) { + llvm::BitVector BlockReachable(Cfg.getNumBlockIDs(), false); + + llvm::SmallVector BlocksToVisit; + BlocksToVisit.push_back(&Cfg.getEntry()); + while (!BlocksToVisit.empty()) { + const CFGBlock *Block = BlocksToVisit.back(); + BlocksToVisit.pop_back(); + + if (BlockReachable[Block->getBlockID()]) + continue; + + BlockReachable[Block->getBlockID()] = true; + + for (const CFGBlock *Succ : Block->succs()) + if (Succ) + BlocksToVisit.push_back(Succ); + } + + return BlockReachable; +} + llvm::Expected ControlFlowContext::build(const Decl *D, Stmt &S, ASTContext &C) { CFG::BuildOptions Options; @@ -64,7 +87,11 @@ ControlFlowContext::build(const Decl *D, Stmt &S, ASTContext &C) { llvm::DenseMap StmtToBlock = buildStmtToBasicBlockMap(*Cfg); - return ControlFlowContext(D, std::move(Cfg), std::move(StmtToBlock)); + + llvm::BitVector BlockReachable = findReachableBlocks(*Cfg); + + return ControlFlowContext(D, std::move(Cfg), std::move(StmtToBlock), + std::move(BlockReachable)); } } // namespace dataflow diff --git a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp index a1b813982502b..57169baccbd4a 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp @@ -15,13 +15,20 @@ #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" #include "clang/AST/ExprCXX.h" #include "clang/Analysis/FlowSensitive/DebugSupport.h" +#include "clang/Analysis/FlowSensitive/Logger.h" #include "clang/Analysis/FlowSensitive/Value.h" #include "llvm/ADT/SetOperations.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include #include #include +static llvm::cl::opt + DataflowLog("dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, + llvm::cl::desc("Emit log of dataflow analysis. With no arg, " + "writes textual log to stderr.")); + namespace clang { namespace dataflow { @@ -375,6 +382,27 @@ DataflowAnalysisContext::getControlFlowContext(const FunctionDecl *F) { return nullptr; } +DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr S, + Options Opts) + : S(std::move(S)), TrueVal(createAtomicBoolValue()), + FalseVal(createAtomicBoolValue()), Opts(Opts) { + assert(this->S != nullptr); + // If the -dataflow-log command-line flag was set, synthesize a logger. + // This is ugly but provides a uniform method for ad-hoc debugging dataflow- + // based tools. + if (Opts.Log == nullptr) { + if (DataflowLog.getNumOccurrences() > 0) { + LogOwner = Logger::textual(llvm::errs()); + this->Opts.Log = LogOwner.get(); + // FIXME: if the flag is given a value, write an HTML log to a file. + } else { + this->Opts.Log = &Logger::null(); + } + } +} + +DataflowAnalysisContext::~DataflowAnalysisContext() = default; + } // namespace dataflow } // namespace clang diff --git a/clang/lib/Analysis/FlowSensitive/Logger.cpp b/clang/lib/Analysis/FlowSensitive/Logger.cpp new file mode 100644 index 0000000000000..469fea338e451 --- /dev/null +++ b/clang/lib/Analysis/FlowSensitive/Logger.cpp @@ -0,0 +1,108 @@ +//===-- Logger.cpp --------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Logger.h" +#include "clang/Analysis/FlowSensitive/ControlFlowContext.h" +#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" +#include "llvm/Support/WithColor.h" + +namespace clang::dataflow { + +Logger &Logger::null() { + struct NullLogger final : Logger {}; + static auto *Instance = new NullLogger(); + return *Instance; +} + +namespace { +struct TextualLogger final : Logger { + llvm::raw_ostream &OS; + const CFG *CurrentCFG; + const CFGBlock *CurrentBlock; + const CFGElement *CurrentElement; + unsigned CurrentElementIndex; + bool ShowColors; + llvm::DenseMap VisitCount; + TypeErasedDataflowAnalysis *CurrentAnalysis; + + TextualLogger(llvm::raw_ostream &OS) + : OS(OS), ShowColors(llvm::WithColor::defaultAutoDetectFunction()(OS)) {} + + virtual void beginAnalysis(const ControlFlowContext &CFG, + TypeErasedDataflowAnalysis &Analysis) override { + { + llvm::WithColor Header(OS, llvm::raw_ostream::Colors::RED, /*Bold=*/true); + OS << "=== Beginning data flow analysis ===\n"; + } + if (auto *D = CFG.getDecl()) { + D->print(OS); + OS << "\n"; + D->dump(OS); + } + CurrentCFG = &CFG.getCFG(); + CurrentCFG->print(OS, Analysis.getASTContext().getLangOpts(), ShowColors); + CurrentAnalysis = &Analysis; + } + virtual void endAnalysis() override { + llvm::WithColor Header(OS, llvm::raw_ostream::Colors::RED, /*Bold=*/true); + unsigned Blocks = 0, Steps = 0; + for (const auto &E : VisitCount) { + ++Blocks; + Steps += E.second; + } + llvm::errs() << "=== Finished analysis: " << Blocks << " blocks in " + << Steps << " total steps ===\n"; + } + virtual void enterBlock(const CFGBlock &Block) override { + unsigned Count = ++VisitCount[&Block]; + { + llvm::WithColor Header(OS, llvm::raw_ostream::Colors::RED, /*Bold=*/true); + OS << "=== Entering block B" << Block.getBlockID() << " (iteration " + << Count << ") ===\n"; + } + Block.print(OS, CurrentCFG, CurrentAnalysis->getASTContext().getLangOpts(), + ShowColors); + CurrentBlock = &Block; + CurrentElement = nullptr; + CurrentElementIndex = 0; + } + virtual void enterElement(const CFGElement &Element) override { + ++CurrentElementIndex; + CurrentElement = ∈ + { + llvm::WithColor Subheader(OS, llvm::raw_ostream::Colors::CYAN, + /*Bold=*/true); + OS << "Processing element B" << CurrentBlock->getBlockID() << "." + << CurrentElementIndex << ": "; + Element.dumpToStream(OS); + } + } + void recordState(TypeErasedDataflowAnalysisState &State) override { + { + llvm::WithColor Subheader(OS, llvm::raw_ostream::Colors::CYAN, + /*Bold=*/true); + OS << "Computed state for B" << CurrentBlock->getBlockID() << "." + << CurrentElementIndex << ":\n"; + } + // FIXME: currently the environment dump is verbose and unenlightening. + // FIXME: dump the user-defined lattice, too. + State.Env.dump(OS); + OS << "\n"; + } + void blockConverged() override { + OS << "B" << CurrentBlock->getBlockID() << " has converged!\n"; + } + virtual void logText(llvm::StringRef S) override { OS << S << "\n"; } +}; +} // namespace + +std::unique_ptr Logger::textual(llvm::raw_ostream &OS) { + return std::make_unique(OS); +} + +} // namespace clang::dataflow diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp index e427f1458a8db..a1ed37da54c28 100644 --- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp +++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp @@ -162,15 +162,27 @@ class TransferVisitor : public ConstStmtVisitor { } case BO_LAnd: case BO_LOr: { - BoolValue &LHSVal = getLogicOperatorSubExprValue(*LHS); - BoolValue &RHSVal = getLogicOperatorSubExprValue(*RHS); - auto &Loc = Env.createStorageLocation(*S); Env.setStorageLocation(*S, Loc); + + BoolValue *LHSVal = getLogicOperatorSubExprValue(*LHS); + // If the LHS was not reachable, this BinaryOperator would also not be + // reachable, and we would never get here. + assert(LHSVal != nullptr); + BoolValue *RHSVal = getLogicOperatorSubExprValue(*RHS); + if (RHSVal == nullptr) { + // If the RHS isn't reachable and we evaluate this BinaryOperator, + // then the value of the LHS must have triggered the short-circuit + // logic. This implies that the value of the entire expression must be + // equal to the value of the LHS. + Env.setValue(Loc, *LHSVal); + break; + } + if (S->getOpcode() == BO_LAnd) - Env.setValue(Loc, Env.makeAnd(LHSVal, RHSVal)); + Env.setValue(Loc, Env.makeAnd(*LHSVal, *RHSVal)); else - Env.setValue(Loc, Env.makeOr(LHSVal, RHSVal)); + Env.setValue(Loc, Env.makeOr(*LHSVal, *RHSVal)); break; } case BO_NE: @@ -779,15 +791,19 @@ class TransferVisitor : public ConstStmtVisitor { } private: - BoolValue &getLogicOperatorSubExprValue(const Expr &SubExpr) { + /// If `SubExpr` is reachable, returns a non-null pointer to the value for + /// `SubExpr`. If `SubExpr` is not reachable, returns nullptr. + BoolValue *getLogicOperatorSubExprValue(const Expr &SubExpr) { // `SubExpr` and its parent logic operator might be part of different basic // blocks. We try to access the value that is assigned to `SubExpr` in the // corresponding environment. - if (const Environment *SubExprEnv = StmtToEnv.getEnvironment(SubExpr)) { - if (auto *Val = dyn_cast_or_null( - SubExprEnv->getValue(SubExpr, SkipPast::Reference))) - return *Val; - } + const Environment *SubExprEnv = StmtToEnv.getEnvironment(SubExpr); + if (!SubExprEnv) + return nullptr; + + if (auto *Val = dyn_cast_or_null( + SubExprEnv->getValue(SubExpr, SkipPast::Reference))) + return Val; if (Env.getStorageLocation(SubExpr, SkipPast::None) == nullptr) { // Sub-expressions that are logic operators are not added in basic blocks @@ -800,11 +816,11 @@ class TransferVisitor : public ConstStmtVisitor { if (auto *Val = dyn_cast_or_null( Env.getValue(SubExpr, SkipPast::Reference))) - return *Val; + return Val; // If the value of `SubExpr` is still unknown, we create a fresh symbolic // boolean value for it. - return Env.makeAtomicBoolValue(); + return &Env.makeAtomicBoolValue(); } // If context sensitivity is enabled, try to analyze the body of the callee diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp index fe00d765b6bef..08bcd5e65e379 100644 --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -51,6 +51,8 @@ class StmtToEnvMapImpl : public StmtToEnvMap { const Environment *getEnvironment(const Stmt &S) const override { auto BlockIt = CFCtx.getStmtToBlock().find(&ignoreCFGOmittedNodes(S)); assert(BlockIt != CFCtx.getStmtToBlock().end()); + if (!CFCtx.isBlockReachable(*BlockIt->getSecond())) + return nullptr; const auto &State = BlockToState[BlockIt->getSecond()->getBlockID()]; assert(State); return &State->Env; @@ -189,7 +191,10 @@ struct AnalysisContext { llvm::ArrayRef> BlockStates) : CFCtx(CFCtx), Analysis(Analysis), InitEnv(InitEnv), - BlockStates(BlockStates) {} + Log(InitEnv.logger()), BlockStates(BlockStates) { + Log.beginAnalysis(CFCtx, Analysis); + } + ~AnalysisContext() { Log.endAnalysis(); } /// Contains the CFG being analyzed. const ControlFlowContext &CFCtx; @@ -197,6 +202,7 @@ struct AnalysisContext { TypeErasedDataflowAnalysis &Analysis; /// Initial state to start the analysis. const Environment &InitEnv; + Logger &Log; /// Stores the state of a CFG block if it has been evaluated by the analysis. /// The indices correspond to the block IDs. llvm::ArrayRef> BlockStates; @@ -366,8 +372,11 @@ transferCFGBlock(const CFGBlock &Block, AnalysisContext &AC, std::function PostVisitCFG = nullptr) { + AC.Log.enterBlock(Block); auto State = computeBlockInputState(Block, AC); + AC.Log.recordState(State); for (const auto &Element : Block) { + AC.Log.enterElement(Element); // Built-in analysis if (AC.Analysis.builtinOptions()) { builtinTransfer(Element, State, AC); @@ -380,6 +389,7 @@ transferCFGBlock(const CFGBlock &Block, AnalysisContext &AC, if (PostVisitCFG) { PostVisitCFG(Element, State); } + AC.Log.recordState(State); } return State; } @@ -460,15 +470,18 @@ runTypeErasedDataflowAnalysis( LatticeJoinEffect Effect2 = NewBlockState.Env.widen(OldBlockState->Env, Analysis); if (Effect1 == LatticeJoinEffect::Unchanged && - Effect2 == LatticeJoinEffect::Unchanged) + Effect2 == LatticeJoinEffect::Unchanged) { // The state of `Block` didn't change from widening so there's no need // to revisit its successors. + AC.Log.blockConverged(); continue; + } } else if (Analysis.isEqualTypeErased(OldBlockState->Lattice, NewBlockState.Lattice) && OldBlockState->Env.equivalentTo(NewBlockState.Env, Analysis)) { // The state of `Block` didn't change after transfer so there's no need // to revisit its successors. + AC.Log.blockConverged(); continue; } } diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 04e11d0471a7d..4a8358af68ec5 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -463,6 +463,45 @@ class ULCArraySubscriptGadget : public FixableGadget { return {}; } }; + +class PointerDereferenceGadget : public FixableGadget { + static constexpr const char *const BaseDeclRefExprTag = "BaseDRE"; + static constexpr const char *const OperatorTag = "op"; + + const DeclRefExpr *BaseDeclRefExpr = nullptr; + const UnaryOperator *Op = nullptr; + +public: + PointerDereferenceGadget(const MatchFinder::MatchResult &Result) + : FixableGadget(Kind::PointerDereference), + BaseDeclRefExpr( + Result.Nodes.getNodeAs(BaseDeclRefExprTag)), + Op(Result.Nodes.getNodeAs(OperatorTag)) {} + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::PointerDereference; + } + + static Matcher matcher() { + auto Target = + unaryOperator( + hasOperatorName("*"), + has(expr(ignoringParenImpCasts( + declRefExpr(to(varDecl())).bind(BaseDeclRefExprTag))))) + .bind(OperatorTag); + + return expr(isInUnspecifiedLvalueContext(Target)); + } + + DeclUseList getClaimedVarUseSites() const override { + return {BaseDeclRefExpr}; + } + + virtual const Stmt *getBaseStmt() const final { return Op; } + + virtual std::optional getFixits(const Strategy &S) const override; +}; + } // namespace namespace { @@ -914,6 +953,38 @@ DerefSimplePtrArithFixableGadget::getFixits(const Strategy &s) const { return std::nullopt; // something wrong or unsupported, give up } +std::optional +PointerDereferenceGadget::getFixits(const Strategy &S) const { + const VarDecl *VD = cast(BaseDeclRefExpr->getDecl()); + switch (S.lookup(VD)) { + case Strategy::Kind::Span: { + ASTContext &Ctx = VD->getASTContext(); + SourceManager &SM = Ctx.getSourceManager(); + // Required changes: *(ptr); => (ptr[0]); and *ptr; => ptr[0] + // Deletes the *operand + CharSourceRange derefRange = clang::CharSourceRange::getCharRange( + Op->getBeginLoc(), Op->getBeginLoc().getLocWithOffset(1)); + // Inserts the [0] + std::optional endOfOperand = + getEndCharLoc(BaseDeclRefExpr, SM, Ctx.getLangOpts()); + if (endOfOperand) { + return FixItList{{FixItHint::CreateRemoval(derefRange), + FixItHint::CreateInsertion( + endOfOperand.value().getLocWithOffset(1), "[0]")}}; + } + } + [[fallthrough]]; + case Strategy::Kind::Iterator: + case Strategy::Kind::Array: + case Strategy::Kind::Vector: + llvm_unreachable("Strategy not implemented yet!"); + case Strategy::Kind::Wontfix: + llvm_unreachable("Invalid strategy!"); + } + + return std::nullopt; +} + // For a non-null initializer `Init` of `T *` type, this function returns // `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it // to output stream. diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index bacd93ee1c379..aca51b2b95b59 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -93,6 +93,8 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, default: llvm_unreachable("TargetPointerWidth must be 32 or 64"); } + + MaxAtomicInlineWidth = TargetPointerWidth; return; } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 6381d68c161c6..b3aea13878c1c 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2856,6 +2856,18 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Builder.CreateCall(FnAssume, ArgValue); return RValue::get(nullptr); } + case Builtin::BI__builtin_assume_separate_storage: { + const Expr *Arg0 = E->getArg(0); + const Expr *Arg1 = E->getArg(1); + + Value *Value0 = EmitScalarExpr(Arg0); + Value *Value1 = EmitScalarExpr(Arg1); + + Value *Values[] = {Value0, Value1}; + OperandBundleDefT OBD("separate_storage", Values); + Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD}); + return RValue::get(nullptr); + } case Builtin::BI__arithmetic_fence: { // Create the builtin call if FastMath is selected, and the target // supports the builtin, otherwise just return the argument. diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp index 0d0b5707e605a..9d7284cd0e37d 100644 --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -880,9 +880,11 @@ CodeGenModule::EmitCXXGlobalInitFunc() { // Include the filename in the symbol name. Including "sub_" matches gcc // and makes sure these symbols appear lexicographically behind the symbols - // with priority emitted above. + // with priority emitted above. Module implementation units behave the same + // way as a non-modular TU with imports. llvm::Function *Fn; - if (CXX20ModuleInits && getContext().getNamedModuleForCodeGen()) { + if (CXX20ModuleInits && getContext().getNamedModuleForCodeGen() && + !getContext().getNamedModuleForCodeGen()->isModuleImplementation()) { SmallString<256> InitFnName; llvm::raw_svector_ostream Out(InitFnName); cast(getCXXABI().getMangleContext()) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 5f21cfca66bb8..76d0b92796bc5 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1054,7 +1054,7 @@ static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, } CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) - : CGM(CGM), OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager() { + : CGM(CGM), OMPBuilder(CGM.getModule()) { KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsDevice, false, hasRequiresUnifiedSharedMemory(), @@ -1062,7 +1062,6 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) // Initialize Types used in OpenMPIRBuilder from OMPKinds.def OMPBuilder.initialize(); OMPBuilder.setConfig(Config); - OffloadEntriesInfoManager.setConfig(Config); loadOffloadInfoMetadata(); } @@ -1852,7 +1851,7 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, auto EntryInfo = getTargetEntryUniqueInfo(CGM.getContext(), Loc, VD->getName()); SmallString<128> Buffer, Out; - OffloadEntriesInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo); + OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo); const Expr *Init = VD->getAnyInitializer(); if (CGM.getLangOpts().CPlusPlus && PerformInit) { @@ -1900,7 +1899,7 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, Out.clear(); auto CtorEntryInfo = EntryInfo; CtorEntryInfo.ParentName = Twine(Buffer, "_ctor").toStringRef(Out); - OffloadEntriesInfoManager.registerTargetRegionEntryInfo( + OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo( CtorEntryInfo, Ctor, ID, llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor); } @@ -1949,7 +1948,7 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, Out.clear(); auto DtorEntryInfo = EntryInfo; DtorEntryInfo.ParentName = Twine(Buffer, "_dtor").toStringRef(Out); - OffloadEntriesInfoManager.registerTargetRegionEntryInfo( + OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo( DtorEntryInfo, Dtor, ID, llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor); } @@ -2942,7 +2941,7 @@ enum KmpTaskTFields { void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // If we are in simd mode or there are no entries, we don't need to do // anything. - if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) + if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty()) return; llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn = @@ -2986,8 +2985,7 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { } }; - OMPBuilder.createOffloadEntriesAndInfoMetadata(OffloadEntriesInfoManager, - ErrorReportFn); + OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn); } /// Loads all the offload entries information from the host IR @@ -3021,7 +3019,7 @@ void CGOpenMPRuntime::loadOffloadInfoMetadata() { return; } - OMPBuilder.loadOffloadInfoMetadata(*ME.get(), OffloadEntriesInfoManager); + OMPBuilder.loadOffloadInfoMetadata(*ME.get()); } void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { @@ -6109,10 +6107,9 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); - OMPBuilder.emitTargetRegionFunction(OffloadEntriesInfoManager, EntryInfo, - GenerateOutlinedFunction, DefaultValTeams, - DefaultValThreads, IsOffloadEntry, - OutlinedFn, OutlinedFnID); + OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction, + DefaultValTeams, DefaultValThreads, + IsOffloadEntry, OutlinedFn, OutlinedFnID); if (OutlinedFn != nullptr) CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); @@ -10136,7 +10133,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, // Is this a target region that should not be emitted as an entry point? If // so just signal we are done with this target region. - if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(EntryInfo)) + if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo)) return; switch (E.getDirectiveKind()) { @@ -10387,10 +10384,12 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, } Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); // Temp solution to prevent optimizations of the internal variables. - if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { + if (CGM.getLangOpts().OpenMPIsDevice && + (!VD->isExternallyVisible() || + Linkage == llvm::GlobalValue::LinkOnceODRLinkage)) { // Do not create a "ref-variable" if the original is not also available // on the host. - if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) + if (!OMPBuilder.OffloadInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) return; std::string RefName = getName({VarName, "ref"}); if (!CGM.GetGlobalValue(RefName)) { @@ -10425,7 +10424,7 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, Linkage = llvm::GlobalValue::WeakAnyLinkage; } - OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( + OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo( VarName, Addr, VarSize, Flags, Linkage); } @@ -10560,9 +10559,8 @@ llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { // don't need to do anything. if (CGM.getLangOpts().OMPTargetTriples.empty() || CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || - (OffloadEntriesInfoManager.empty() && - !HasEmittedDeclareTargetRegion && - !HasEmittedTargetRegion)) + (OMPBuilder.OffloadInfoManager.empty() && + !HasEmittedDeclareTargetRegion && !HasEmittedTargetRegion)) return nullptr; // Create and register the function that handles the requires directives. @@ -10583,9 +10581,8 @@ llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { // passed to the runtime. This avoids the runtime from throwing an error // for mismatching requires clauses across compilation units that don't // contain at least 1 target region. - assert((HasEmittedTargetRegion || - HasEmittedDeclareTargetRegion || - !OffloadEntriesInfoManager.empty()) && + assert((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion || + !OMPBuilder.OffloadInfoManager.empty()) && "Target or declare target region expected."); if (HasRequiresUnifiedSharedMemory) Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index e7c1a098c7689..c9678a16ce90b 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -508,9 +508,6 @@ class CGOpenMPRuntime { /// kmp_int64 st; // stride /// }; QualType KmpDimTy; - /// Entity that registers the offloading constants that were emitted so - /// far. - llvm::OffloadEntriesInfoManager OffloadEntriesInfoManager; bool ShouldMarkAsGlobal = true; /// List of the emitted declarations. diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index e8c5f04db49f4..4ac28ee17a50b 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -863,7 +863,6 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM) hasRequiresUnifiedSharedMemory(), CGM.getLangOpts().OpenMPOffloadMandatory); OMPBuilder.setConfig(Config); - OffloadEntriesInfoManager.setConfig(Config); if (!CGM.getLangOpts().OpenMPIsDevice) llvm_unreachable("OpenMP can only handle device code."); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 0e33e9632b3eb..bd1ee2a674abb 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -548,6 +548,8 @@ void CodeGenModule::Release() { GlobalTopLevelStmtBlockInFlight = {nullptr, nullptr}; } + // Module implementations are initialized the same way as a regular TU that + // imports one or more modules. if (CXX20ModuleInits && Primary && Primary->isInterfaceOrPartition()) EmitCXXModuleInitFunc(Primary); else diff --git a/clang/lib/Driver/Multilib.cpp b/clang/lib/Driver/Multilib.cpp index d1ab0c7b114e9..06bab74898616 100644 --- a/clang/lib/Driver/Multilib.cpp +++ b/clang/lib/Driver/Multilib.cpp @@ -26,10 +26,9 @@ using namespace driver; using namespace llvm::sys; Multilib::Multilib(StringRef GCCSuffix, StringRef OSSuffix, - StringRef IncludeSuffix, int Priority, - const flags_list &Flags) + StringRef IncludeSuffix, const flags_list &Flags) : GCCSuffix(GCCSuffix), OSSuffix(OSSuffix), IncludeSuffix(IncludeSuffix), - Flags(Flags), Priority(Priority) { + Flags(Flags) { assert(GCCSuffix.empty() || (StringRef(GCCSuffix).front() == '/' && GCCSuffix.size() > 1)); assert(OSSuffix.empty() || @@ -84,56 +83,36 @@ raw_ostream &clang::driver::operator<<(raw_ostream &OS, const Multilib &M) { } MultilibSet &MultilibSet::FilterOut(FilterCallback F) { - filterInPlace(F, Multilibs); + llvm::erase_if(Multilibs, F); return *this; } void MultilibSet::push_back(const Multilib &M) { Multilibs.push_back(M); } -static bool isFlagEnabled(StringRef Flag) { - char Indicator = Flag.front(); - assert(Indicator == '+' || Indicator == '-'); - return Indicator == '+'; +MultilibSet::multilib_list +MultilibSet::select(const Multilib::flags_list &Flags) const { + llvm::StringSet<> FlagSet; + for (const auto &Flag : Flags) + FlagSet.insert(Flag); + + multilib_list Result; + llvm::copy_if(Multilibs, std::back_inserter(Result), + [&FlagSet](const Multilib &M) { + for (const std::string &F : M.flags()) + if (!FlagSet.contains(F)) + return false; + return true; + }); + return Result; } -bool MultilibSet::select(const Multilib::flags_list &Flags, Multilib &M) const { - llvm::StringMap FlagSet; - - // Stuff all of the flags into the FlagSet such that a true mappend indicates - // the flag was enabled, and a false mappend indicates the flag was disabled. - for (StringRef Flag : Flags) - FlagSet[Flag.substr(1)] = isFlagEnabled(Flag); - - multilib_list Filtered = filterCopy([&FlagSet](const Multilib &M) { - for (StringRef Flag : M.flags()) { - llvm::StringMap::const_iterator SI = FlagSet.find(Flag.substr(1)); - if (SI != FlagSet.end()) - if (SI->getValue() != isFlagEnabled(Flag)) - return true; - } - return false; - }, Multilibs); - - if (Filtered.empty()) +bool MultilibSet::select(const Multilib::flags_list &Flags, + Multilib &Selected) const { + multilib_list Result = select(Flags); + if (Result.empty()) return false; - if (Filtered.size() == 1) { - M = Filtered[0]; - return true; - } - - // Sort multilibs by priority and select the one with the highest priority. - llvm::sort(Filtered, [](const Multilib &a, const Multilib &b) -> bool { - return a.priority() > b.priority(); - }); - - if (Filtered[0].priority() > Filtered[1].priority()) { - M = Filtered[0]; - return true; - } - - // TODO: We should consider returning llvm::Error rather than aborting. - assert(false && "More than one multilib with the same priority"); - return false; + Selected = Result.back(); + return true; } LLVM_DUMP_METHOD void MultilibSet::dump() const { @@ -145,17 +124,6 @@ void MultilibSet::print(raw_ostream &OS) const { OS << M << "\n"; } -MultilibSet::multilib_list MultilibSet::filterCopy(FilterCallback F, - const multilib_list &Ms) { - multilib_list Copy(Ms); - filterInPlace(F, Copy); - return Copy; -} - -void MultilibSet::filterInPlace(FilterCallback F, multilib_list &Ms) { - llvm::erase_if(Ms, F); -} - raw_ostream &clang::driver::operator<<(raw_ostream &OS, const MultilibSet &MS) { MS.print(OS); return OS; diff --git a/clang/lib/Driver/MultilibBuilder.cpp b/clang/lib/Driver/MultilibBuilder.cpp index 83ebc31d8eb99..f6351ae4b5278 100644 --- a/clang/lib/Driver/MultilibBuilder.cpp +++ b/clang/lib/Driver/MultilibBuilder.cpp @@ -41,9 +41,8 @@ static void normalizePathSegment(std::string &Segment) { } } -MultilibBuilder::MultilibBuilder(StringRef GCC, StringRef OS, StringRef Include, - int Priority) - : GCCSuffix(GCC), OSSuffix(OS), IncludeSuffix(Include), Priority(Priority) { +MultilibBuilder::MultilibBuilder(StringRef GCC, StringRef OS, StringRef Include) + : GCCSuffix(GCC), OSSuffix(OS), IncludeSuffix(Include) { normalizePathSegment(GCCSuffix); normalizePathSegment(OSSuffix); normalizePathSegment(IncludeSuffix); @@ -87,7 +86,7 @@ bool MultilibBuilder::isValid() const { } Multilib MultilibBuilder::makeMultilib() const { - return Multilib(GCCSuffix, OSSuffix, IncludeSuffix, Priority, Flags); + return Multilib(GCCSuffix, OSSuffix, IncludeSuffix, Flags); } MultilibSetBuilder &MultilibSetBuilder::Maybe(const MultilibBuilder &M) { diff --git a/clang/lib/Driver/ToolChains/Arch/Mips.cpp b/clang/lib/Driver/ToolChains/Arch/Mips.cpp index 7da00a8854006..f9f14c01b2b9f 100644 --- a/clang/lib/Driver/ToolChains/Arch/Mips.cpp +++ b/clang/lib/Driver/ToolChains/Arch/Mips.cpp @@ -39,12 +39,6 @@ void mips::getMipsCPUAndABI(const ArgList &Args, const llvm::Triple &Triple, DefMips64CPU = "mips64r6"; } - // MIPS64r6 is the default for Android MIPS64 (mips64el-linux-android). - if (Triple.isAndroid()) { - DefMips32CPU = "mips32"; - DefMips64CPU = "mips64r6"; - } - // MIPS3 is the default for mips64*-unknown-openbsd. if (Triple.isOSOpenBSD()) DefMips64CPU = "mips3"; diff --git a/clang/lib/Driver/ToolChains/Fuchsia.cpp b/clang/lib/Driver/ToolChains/Fuchsia.cpp index 3a3f7043a795f..b8bb000391b91 100644 --- a/clang/lib/Driver/ToolChains/Fuchsia.cpp +++ b/clang/lib/Driver/ToolChains/Fuchsia.cpp @@ -263,33 +263,33 @@ Fuchsia::Fuchsia(const Driver &D, const llvm::Triple &Triple, Multilibs.push_back(Multilib()); // Use the noexcept variant with -fno-exceptions to avoid the extra overhead. - Multilibs.push_back(MultilibBuilder("noexcept", {}, {}, 1) + Multilibs.push_back(MultilibBuilder("noexcept", {}, {}) .flag("-fexceptions") .flag("+fno-exceptions") .makeMultilib()); // ASan has higher priority because we always want the instrumentated version. - Multilibs.push_back(MultilibBuilder("asan", {}, {}, 2) + Multilibs.push_back(MultilibBuilder("asan", {}, {}) .flag("+fsanitize=address") .makeMultilib()); // Use the asan+noexcept variant with ASan and -fno-exceptions. - Multilibs.push_back(MultilibBuilder("asan+noexcept", {}, {}, 3) + Multilibs.push_back(MultilibBuilder("asan+noexcept", {}, {}) .flag("+fsanitize=address") .flag("-fexceptions") .flag("+fno-exceptions") .makeMultilib()); // HWASan has higher priority because we always want the instrumentated // version. - Multilibs.push_back(MultilibBuilder("hwasan", {}, {}, 4) + Multilibs.push_back(MultilibBuilder("hwasan", {}, {}) .flag("+fsanitize=hwaddress") .makeMultilib()); // Use the hwasan+noexcept variant with HWASan and -fno-exceptions. - Multilibs.push_back(MultilibBuilder("hwasan+noexcept", {}, {}, 5) + Multilibs.push_back(MultilibBuilder("hwasan+noexcept", {}, {}) .flag("+fsanitize=hwaddress") .flag("-fexceptions") .flag("+fno-exceptions") .makeMultilib()); // Use Itanium C++ ABI for the compat multilib. - Multilibs.push_back(MultilibBuilder("compat", {}, {}, 6) + Multilibs.push_back(MultilibBuilder("compat", {}, {}) .flag("+fc++-abi=itanium") .makeMultilib()); @@ -299,9 +299,10 @@ Fuchsia::Fuchsia(const Driver &D, const llvm::Triple &Triple, }); Multilib::flags_list Flags; - addMultilibFlag( - Args.hasFlag(options::OPT_fexceptions, options::OPT_fno_exceptions, true), - "fexceptions", Flags); + bool Exceptions = + Args.hasFlag(options::OPT_fexceptions, options::OPT_fno_exceptions, true); + addMultilibFlag(Exceptions, "fexceptions", Flags); + addMultilibFlag(!Exceptions, "fno-exceptions", Flags); addMultilibFlag(getSanitizerArgs(Args).needsAsanRt(), "fsanitize=address", Flags); addMultilibFlag(getSanitizerArgs(Args).needsHwasanRt(), "fsanitize=hwaddress", diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 7e72a1d1433da..0c8868109f7ee 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -2431,9 +2431,6 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( static const char *const AArch64AndroidTriples[] = { "aarch64-linux-android"}; static const char *const ARMAndroidTriples[] = {"arm-linux-androideabi"}; - static const char *const MIPSELAndroidTriples[] = {"mipsel-linux-android"}; - static const char *const MIPS64ELAndroidTriples[] = { - "mips64el-linux-android"}; static const char *const X86AndroidTriples[] = {"i686-linux-android"}; static const char *const X86_64AndroidTriples[] = {"x86_64-linux-android"}; @@ -2448,22 +2445,6 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( LibDirs.append(begin(ARMLibDirs), end(ARMLibDirs)); TripleAliases.append(begin(ARMAndroidTriples), end(ARMAndroidTriples)); break; - case llvm::Triple::mipsel: - LibDirs.append(begin(MIPSELLibDirs), end(MIPSELLibDirs)); - TripleAliases.append(begin(MIPSELAndroidTriples), - end(MIPSELAndroidTriples)); - BiarchLibDirs.append(begin(MIPS64ELLibDirs), end(MIPS64ELLibDirs)); - BiarchTripleAliases.append(begin(MIPS64ELAndroidTriples), - end(MIPS64ELAndroidTriples)); - break; - case llvm::Triple::mips64el: - LibDirs.append(begin(MIPS64ELLibDirs), end(MIPS64ELLibDirs)); - TripleAliases.append(begin(MIPS64ELAndroidTriples), - end(MIPS64ELAndroidTriples)); - BiarchLibDirs.append(begin(MIPSELLibDirs), end(MIPSELLibDirs)); - BiarchTripleAliases.append(begin(MIPSELAndroidTriples), - end(MIPSELAndroidTriples)); - break; case llvm::Triple::x86_64: LibDirs.append(begin(X86_64LibDirs), end(X86_64LibDirs)); TripleAliases.append(begin(X86_64AndroidTriples), diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index 848d7247c20c0..77ad9605addab 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -92,8 +92,6 @@ std::string Linux::getMultiarchTriple(const Driver &D, case llvm::Triple::mips: return IsMipsR6 ? "mipsisa32r6-linux-gnu" : "mips-linux-gnu"; case llvm::Triple::mipsel: - if (IsAndroid) - return "mipsel-linux-android"; return IsMipsR6 ? "mipsisa32r6el-linux-gnu" : "mipsel-linux-gnu"; case llvm::Triple::mips64: { std::string MT = std::string(IsMipsR6 ? "mipsisa64r6" : "mips64") + @@ -105,8 +103,6 @@ std::string Linux::getMultiarchTriple(const Driver &D, break; } case llvm::Triple::mips64el: { - if (IsAndroid) - return "mips64el-linux-android"; std::string MT = std::string(IsMipsR6 ? "mipsisa64r6el" : "mips64el") + "-linux-" + (IsMipsN32Abi ? "gnuabin32" : "gnuabi64"); if (D.getVFS().exists(concat(SysRoot, "/lib", MT))) @@ -126,6 +122,8 @@ std::string Linux::getMultiarchTriple(const Driver &D, case llvm::Triple::ppc64le: return "powerpc64le-linux-gnu"; case llvm::Triple::riscv64: + if (IsAndroid) + return "riscv64-linux-android"; return "riscv64-linux-gnu"; case llvm::Triple::sparc: return "sparc-linux-gnu"; diff --git a/clang/lib/Driver/ToolChains/OHOS.cpp b/clang/lib/Driver/ToolChains/OHOS.cpp index 71a4ccd042ac8..bd0409d282084 100644 --- a/clang/lib/Driver/ToolChains/OHOS.cpp +++ b/clang/lib/Driver/ToolChains/OHOS.cpp @@ -39,14 +39,16 @@ static bool findOHOSMuslMultilibs(const Multilib::flags_list &Flags, // -mcpu=cortex-a7 // -mfloat-abi=soft -mfloat-abi=softfp -mfloat-abi=hard // -mfpu=neon-vfpv4 - Multilibs.push_back(Multilib("/a7_soft", {}, {}, 1, - {"+mcpu=cortex-a7", "+mfloat-abi=soft"})); + Multilibs.push_back( + Multilib("/a7_soft", {}, {}, {"+mcpu=cortex-a7", "+mfloat-abi=soft"})); - Multilibs.push_back(Multilib("/a7_softfp_neon-vfpv4", {}, {}, 1, - {"+mcpu=cortex-a7", "+mfloat-abi=softfp", "+mfpu=neon-vfpv4"})); + Multilibs.push_back( + Multilib("/a7_softfp_neon-vfpv4", {}, {}, + {"+mcpu=cortex-a7", "+mfloat-abi=softfp", "+mfpu=neon-vfpv4"})); - Multilibs.push_back(Multilib("/a7_hard_neon-vfpv4", {}, {}, 1, - {"+mcpu=cortex-a7", "+mfloat-abi=hard", "+mfpu=neon-vfpv4"})); + Multilibs.push_back( + Multilib("/a7_hard_neon-vfpv4", {}, {}, + {"+mcpu=cortex-a7", "+mfloat-abi=hard", "+mfpu=neon-vfpv4"})); if (Multilibs.select(Flags, Result.SelectedMultilib)) { Result.Multilibs = Multilibs; diff --git a/clang/lib/ExtractAPI/DeclarationFragments.cpp b/clang/lib/ExtractAPI/DeclarationFragments.cpp index b8de1270b5f02..c42a1de2fd358 100644 --- a/clang/lib/ExtractAPI/DeclarationFragments.cpp +++ b/clang/lib/ExtractAPI/DeclarationFragments.cpp @@ -243,26 +243,30 @@ DeclarationFragments DeclarationFragmentsBuilder::getFragmentsForType( return Fragments.append(getFragmentsForType(ET->desugar(), Context, After)); } - // Everything we care about has been handled now, reduce to the canonical - // unqualified base type. - QualType Base = T->getCanonicalTypeUnqualified(); - - // Render Objective-C `id`/`instancetype` as keywords. - if (T->isObjCIdType()) - return Fragments.append(Base.getAsString(), - DeclarationFragments::FragmentKind::Keyword); - // If the type is a typedefed type, get the underlying TypedefNameDecl for a // direct reference to the typedef instead of the wrapped type. + + // 'id' type is a typedef for an ObjCObjectPointerType + // we treat it as a typedef if (const TypedefType *TypedefTy = dyn_cast(T)) { const TypedefNameDecl *Decl = TypedefTy->getDecl(); TypedefUnderlyingTypeResolver TypedefResolver(Context); std::string USR = TypedefResolver.getUSRForType(QualType(T, 0)); + + if (T->isObjCIdType()) { + return Fragments.append(Decl->getName(), + DeclarationFragments::FragmentKind::Keyword); + } + return Fragments.append( Decl->getName(), DeclarationFragments::FragmentKind::TypeIdentifier, USR, TypedefResolver.getUnderlyingTypeDecl(QualType(T, 0))); } + // Everything we care about has been handled now, reduce to the canonical + // unqualified base type. + QualType Base = T->getCanonicalTypeUnqualified(); + // If the base type is a TagType (struct/interface/union/class/enum), let's // get the underlying Decl for better names and USRs. if (const TagType *TagTy = dyn_cast(Base)) { diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index c5644c5bfea16..5dbda8fbe0719 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -318,9 +318,10 @@ class AnnotatingParser { // export type X = (...); Contexts.back().IsExpression = false; } else if (OpeningParen.Previous && - (OpeningParen.Previous->isOneOf(tok::kw_static_assert, - tok::kw_while, tok::l_paren, - tok::comma, TT_BinaryOperator) || + (OpeningParen.Previous->isOneOf( + tok::kw_static_assert, tok::kw_noexcept, tok::kw_explicit, + tok::kw_while, tok::l_paren, tok::comma, + TT_BinaryOperator) || OpeningParen.Previous->isIf())) { // static_assert, if and while usually contain expressions. Contexts.back().IsExpression = true; @@ -1908,7 +1909,8 @@ class AnnotatingParser { } else if (Current.is(tok::arrow) && Style.Language == FormatStyle::LK_Java) { Current.setType(TT_LambdaArrow); - } else if (Current.is(tok::arrow) && AutoFound && Line.MustBeDeclaration && + } else if (Current.is(tok::arrow) && AutoFound && + (Line.MustBeDeclaration || Line.InPPDirective) && Current.NestingLevel == 0 && !Current.Previous->isOneOf(tok::kw_operator, tok::identifier)) { // not auto operator->() -> xxx; diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp index 2aae41fe488ff..05d9fc8208b26 100644 --- a/clang/lib/Frontend/FrontendActions.cpp +++ b/clang/lib/Frontend/FrontendActions.cpp @@ -759,6 +759,8 @@ static StringRef ModuleKindName(Module::ModuleKind MK) { return "Module Map Module"; case Module::ModuleInterfaceUnit: return "Interface Unit"; + case Module::ModuleImplementationUnit: + return "Implementation Unit"; case Module::ModulePartitionInterface: return "Partition Interface"; case Module::ModulePartitionImplementation: diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp index 8dead93b03734..44c872336ce9c 100644 --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -888,23 +888,30 @@ ModuleMap::createPrivateModuleFragmentForInterfaceUnit(Module *Parent, return Result; } -Module *ModuleMap::createModuleForInterfaceUnit(SourceLocation Loc, - StringRef Name) { - assert(LangOpts.CurrentModule == Name && "module name mismatch"); - assert(!Modules[Name] && "redefining existing module"); - +Module *ModuleMap::createModuleUnitWithKind(SourceLocation Loc, StringRef Name, + Module::ModuleKind Kind) { auto *Result = new Module(Name, Loc, nullptr, /*IsFramework*/ false, /*IsExplicit*/ false, NumCreatedModules++); - Result->Kind = Module::ModuleInterfaceUnit; - Modules[Name] = SourceModule = Result; + Result->Kind = Kind; - // Reparent the current global module fragment as a submodule of this module. + // Reparent any current global module fragment as a submodule of this module. for (auto &Submodule : PendingSubmodules) { Submodule->setParent(Result); Submodule.release(); // now owned by parent } PendingSubmodules.clear(); + return Result; +} + +Module *ModuleMap::createModuleForInterfaceUnit(SourceLocation Loc, + StringRef Name) { + assert(LangOpts.CurrentModule == Name && "module name mismatch"); + assert(!Modules[Name] && "redefining existing module"); + + auto *Result = + createModuleUnitWithKind(Loc, Name, Module::ModuleInterfaceUnit); + Modules[Name] = SourceModule = Result; // Mark the main source file as being within the newly-created module so that // declarations and macros are properly visibility-restricted to it. @@ -915,6 +922,26 @@ Module *ModuleMap::createModuleForInterfaceUnit(SourceLocation Loc, return Result; } +Module *ModuleMap::createModuleForImplementationUnit(SourceLocation Loc, + StringRef Name) { + assert(LangOpts.CurrentModule == Name && "module name mismatch"); + // The interface for this implementation must exist and be loaded. + assert(Modules[Name] && Modules[Name]->Kind == Module::ModuleInterfaceUnit && + "creating implementation module without an interface"); + + auto *Result = + createModuleUnitWithKind(Loc, Name, Module::ModuleImplementationUnit); + SourceModule = Result; + + // Mark the main source file as being within the newly-created module so that + // declarations and macros are properly visibility-restricted to it. + auto *MainFile = SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()); + (void)MainFile; + assert(MainFile && "no input file for module implementation"); + + return Result; +} + Module *ModuleMap::createHeaderUnit(SourceLocation Loc, StringRef Name, Module::Header H) { assert(LangOpts.CurrentModule == Name && "module name mismatch"); diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 64034393344f0..dd001dba2b912 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -1661,13 +1661,19 @@ bool Sema::CheckRedeclarationModuleOwnership(NamedDecl *New, NamedDecl *Old) { if (NewM == OldM) return false; - // Partitions are part of the module, but a partition could import another - // module, so verify that the PMIs agree. - if (NewM && OldM && - (NewM->isModulePartition() || OldM->isModulePartition()) && - NewM->getPrimaryModuleInterfaceName() == - OldM->getPrimaryModuleInterfaceName()) - return false; + if (NewM && OldM) { + // A module implementation unit has visibility of the decls in its + // implicitly imported interface. + if (NewM->isModuleImplementation() && OldM == ThePrimaryInterface) + return false; + + // Partitions are part of the module, but a partition could import another + // module, so verify that the PMIs agree. + if ((NewM->isModulePartition() || OldM->isModulePartition()) && + NewM->getPrimaryModuleInterfaceName() == + OldM->getPrimaryModuleInterfaceName()) + return false; + } bool NewIsModuleInterface = NewM && NewM->isModulePurview(); bool OldIsModuleInterface = OldM && OldM->isModulePurview(); diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 229e73618c53c..efa275c0aa12b 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -3759,7 +3759,7 @@ static void handleEnumExtensibilityAttr(Sema &S, Decl *D, /// Handle __attribute__((format_arg((idx)))) attribute based on /// http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html static void handleFormatArgAttr(Sema &S, Decl *D, const ParsedAttr &AL) { - Expr *IdxExpr = AL.getArgAsExpr(0); + const Expr *IdxExpr = AL.getArgAsExpr(0); ParamIdx Idx; if (!checkFunctionOrMethodParameterIndex(S, D, AL, 1, IdxExpr, Idx)) return; diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index 17d8b6c98207b..46517c9dde06a 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -1536,7 +1536,7 @@ void InitListChecker::CheckComplexType(const InitializedEntity &Entity, // the element type of the complex type. The first element initializes // the real part, and the second element intitializes the imaginary part. - if (IList->getNumInits() != 2) + if (IList->getNumInits() < 2) return CheckScalarType(Entity, IList, DeclType, Index, StructuredList, StructuredIndex); diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp index 3a82c7b3e8285..64db9d065f9c6 100644 --- a/clang/lib/Sema/SemaLambda.cpp +++ b/clang/lib/Sema/SemaLambda.cpp @@ -390,6 +390,9 @@ buildTypeForLambdaCallOperator(Sema &S, clang::CXXRecordDecl *Class, void Sema::handleLambdaNumbering( CXXRecordDecl *Class, CXXMethodDecl *Method, std::optional> Mangling) { + + ContextRAII ManglingContext(*this, Class->getDeclContext()); + if (Mangling) { bool HasKnownInternalLinkage; unsigned ManglingNumber, DeviceManglingNumber; @@ -1324,8 +1327,6 @@ void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro, ParamInfo.getDeclSpec().getConstexprSpecifier(), IsLambdaStatic ? SC_Static : SC_None, Params, ExplicitResultType); - ContextRAII ManglingContext(*this, Class->getDeclContext()); - CheckCXXDefaultArguments(Method); // This represents the function body for the lambda function, check if we @@ -1350,8 +1351,6 @@ void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro, handleLambdaNumbering(Class, Method); - ManglingContext.pop(); - for (auto &&C : LSI->Captures) { if (!C.isVariableCapture()) continue; diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp index 8c120d278d634..c02b9d2ac25b0 100644 --- a/clang/lib/Sema/SemaModule.cpp +++ b/clang/lib/Sema/SemaModule.cpp @@ -298,8 +298,8 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, const_cast(getLangOpts()).CurrentModule = ModuleName; auto &Map = PP.getHeaderSearchInfo().getModuleMap(); - Module *Mod; - + Module *Mod; // The module we are creating. + Module *Interface = nullptr; // The interface for an implementation. switch (MDK) { case ModuleDeclKind::Interface: case ModuleDeclKind::PartitionInterface: { @@ -336,18 +336,19 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, // we're building if `LangOpts.CurrentModule` equals to 'ModuleName'. // Change the value for `LangOpts.CurrentModule` temporarily to make the // module loader work properly. - const_cast(getLangOpts()).CurrentModule = ""; - Mod = getModuleLoader().loadModule(ModuleLoc, {ModuleNameLoc}, - Module::AllVisible, - /*IsInclusionDirective=*/false); + const_cast(getLangOpts()).CurrentModule = ""; + Interface = getModuleLoader().loadModule(ModuleLoc, {ModuleNameLoc}, + Module::AllVisible, + /*IsInclusionDirective=*/false); const_cast(getLangOpts()).CurrentModule = ModuleName; - if (!Mod) { + if (!Interface) { Diag(ModuleLoc, diag::err_module_not_defined) << ModuleName; // Create an empty module interface unit for error recovery. Mod = Map.createModuleForInterfaceUnit(ModuleLoc, ModuleName); + } else { + Mod = Map.createModuleForImplementationUnit(ModuleLoc, ModuleName); } - } break; case ModuleDeclKind::PartitionImplementation: @@ -386,19 +387,31 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, // statements, so imports are allowed. ImportState = ModuleImportState::ImportAllowed; - // For an implementation, We already made an implicit import (its interface). - // Make and return the import decl to be added to the current TU. - if (MDK == ModuleDeclKind::Implementation) { - // Make the import decl for the interface. - ImportDecl *Import = - ImportDecl::Create(Context, CurContext, ModuleLoc, Mod, Path[0].second); - // and return it to be added. + getASTContext().setNamedModuleForCodeGen(Mod); + + // We already potentially made an implicit import (in the case of a module + // implementation unit importing its interface). Make this module visible + // and return the import decl to be added to the current TU. + if (Interface) { + + VisibleModules.setVisible(Interface, ModuleLoc); + + // Make the import decl for the interface in the impl module. + ImportDecl *Import = ImportDecl::Create(Context, CurContext, ModuleLoc, + Interface, Path[0].second); + CurContext->addDecl(Import); + + // Sequence initialization of the imported module before that of the current + // module, if any. + Context.addModuleInitializer(ModuleScopes.back().Module, Import); + Mod->Imports.insert(Interface); // As if we imported it. + // Also save this as a shortcut to checking for decls in the interface + ThePrimaryInterface = Interface; + // If we made an implicit import of the module interface, then return the + // imported module decl. return ConvertDeclToDeclGroup(Import); } - getASTContext().setNamedModuleForCodeGen(Mod); - - // FIXME: Create a ModuleDecl. return nullptr; } @@ -424,19 +437,17 @@ Sema::ActOnPrivateModuleFragmentDecl(SourceLocation ModuleLoc, Diag(ModuleScopes.back().BeginLoc, diag::note_previous_definition); return nullptr; - case Module::ModuleInterfaceUnit: - break; - } - - if (!ModuleScopes.back().ModuleInterface) { + case Module::ModuleImplementationUnit: Diag(PrivateLoc, diag::err_private_module_fragment_not_module_interface); Diag(ModuleScopes.back().BeginLoc, diag::note_not_module_interface_add_export) << FixItHint::CreateInsertion(ModuleScopes.back().BeginLoc, "export "); return nullptr; + + case Module::ModuleInterfaceUnit: + break; } - // FIXME: Check this isn't a module interface partition. // FIXME: Check that this translation unit does not import any partitions; // such imports would violate [basic.link]/2's "shall be the only module unit" // restriction. diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index e193fa3d19d5c..1cd263b8a5b1c 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -2273,10 +2273,10 @@ bool Sema::isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level, // and alignment, because the runtime library only deals with uintptr types. // If it does not fit the uintptr size, we need to pass the data by reference // instead. - if (!IsByRef && - (Ctx.getTypeSizeInChars(Ty) > - Ctx.getTypeSizeInChars(Ctx.getUIntPtrType()) || - Ctx.getDeclAlign(D) > Ctx.getTypeAlignInChars(Ctx.getUIntPtrType()))) { + if (!IsByRef && (Ctx.getTypeSizeInChars(Ty) > + Ctx.getTypeSizeInChars(Ctx.getUIntPtrType()) || + Ctx.getAlignOfGlobalVarInChars(Ty) > + Ctx.getTypeAlignInChars(Ctx.getUIntPtrType()))) { IsByRef = true; } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index e8f390bc5b1dd..3e40812a9a0ba 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -2719,7 +2719,7 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) { Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_DEFINITION)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ID Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Parent - Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // Kind + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // Kind Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsFramework Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsExplicit Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsSystem @@ -3849,12 +3849,6 @@ class ASTDeclContextNameLookupTrait { } // namespace -bool ASTWriter::isLookupResultExternal(StoredDeclsList &Result, - DeclContext *DC) { - return Result.hasExternalDecls() && - DC->hasNeedToReconcileExternalVisibleStorage(); -} - bool ASTWriter::isLookupResultEntirelyExternal(StoredDeclsList &Result, DeclContext *DC) { for (auto *D : Result.getLookupResult()) @@ -3897,8 +3891,7 @@ ASTWriter::GenerateNameLookupTable(const DeclContext *ConstDC, // don't need to write an entry for the name at all. If we can't // write out a lookup set without performing more deserialization, // just skip this entry. - if (isLookupResultExternal(Result, DC) && - isLookupResultEntirelyExternal(Result, DC)) + if (isLookupResultEntirelyExternal(Result, DC)) continue; // We also skip empty results. If any of the results could be external and diff --git a/clang/lib/Testing/TestAST.cpp b/clang/lib/Testing/TestAST.cpp index 8c79fcd7d6363..3a50c2d9b5d05 100644 --- a/clang/lib/Testing/TestAST.cpp +++ b/clang/lib/Testing/TestAST.cpp @@ -16,6 +16,7 @@ #include "llvm/Support/VirtualFileSystem.h" #include "gtest/gtest.h" +#include namespace clang { namespace { @@ -91,7 +92,9 @@ TestAST::TestAST(const TestInputs &In) { Argv.push_back(S.c_str()); for (const auto &S : In.ExtraArgs) Argv.push_back(S.c_str()); - std::string Filename = getFilenameForTesting(In.Language).str(); + std::string Filename = In.FileName; + if (Filename.empty()) + Filename = getFilenameForTesting(In.Language).str(); Argv.push_back(Filename.c_str()); Clang->setInvocation(std::make_unique()); if (!CompilerInvocation::CreateFromArgs(Clang->getInvocation(), Argv, diff --git a/clang/test/CXX/module/basic/basic.def.odr/p4.cppm b/clang/test/CXX/module/basic/basic.def.odr/p4.cppm index 1542e532c635a..487dbdef283ee 100644 --- a/clang/test/CXX/module/basic/basic.def.odr/p4.cppm +++ b/clang/test/CXX/module/basic/basic.def.odr/p4.cppm @@ -143,9 +143,6 @@ void use() { (void)&inline_var_exported; (void)&const_var_exported; - // CHECK: define {{.*}}@_ZL26used_static_module_linkagev - used_static_module_linkage(); - // CHECK: define linkonce_odr {{.*}}@_ZW6Module26used_inline_module_linkagev used_inline_module_linkage(); @@ -154,8 +151,12 @@ void use() { (void)&extern_var_module_linkage; (void)&inline_var_module_linkage; + + // FIXME: Issue #61427 Internal-linkage declarations in the interface TU + // should not be not visible here. (void)&static_var_module_linkage; // FIXME: Should not be visible here. - (void)&const_var_module_linkage; + + (void)&const_var_module_linkage; // FIXME: will be visible after P2788R0 } //--- user.cpp @@ -176,5 +177,6 @@ void use() { (void)&inline_var_exported; (void)&const_var_exported; + // Internal-linkage declarations are not visible here. // Module-linkage declarations are not visible here. } diff --git a/clang/test/CXX/module/basic/basic.link/p2.cppm b/clang/test/CXX/module/basic/basic.link/p2.cppm index e04412ea08d4a..19761fb3359ce 100644 --- a/clang/test/CXX/module/basic/basic.link/p2.cppm +++ b/clang/test/CXX/module/basic/basic.link/p2.cppm @@ -39,19 +39,21 @@ void use() { } //--- M.cpp -// expected-no-diagnostics + module M; -// FIXME: Use of internal linkage entities should be rejected. void use_from_module_impl() { external_linkage_fn(); module_linkage_fn(); - internal_linkage_fn(); + internal_linkage_fn(); // expected-error {{no matching function for call to 'internal_linkage_fn'}} (void)external_linkage_class{}; (void)module_linkage_class{}; - (void)internal_linkage_class{}; (void)external_linkage_var; (void)module_linkage_var; + + // FIXME: Issue #61427 Internal-linkage declarations in the interface TU + // should not be not visible here. + (void)internal_linkage_class{}; (void)internal_linkage_var; } diff --git a/clang/test/CodeGen/atomics-inlining.c b/clang/test/CodeGen/atomics-inlining.c index ade0e3d75bcb0..862c63076b2dc 100644 --- a/clang/test/CodeGen/atomics-inlining.c +++ b/clang/test/CodeGen/atomics-inlining.c @@ -8,6 +8,7 @@ // RUN: %clang_cc1 -triple mipsisa64r6el-linux-gnuabi64 -emit-llvm %s -o - | FileCheck %s -check-prefix=MIPS64 // RUN: %clang_cc1 -triple sparc-unknown-eabi -emit-llvm %s -o - | FileCheck %s -check-prefix=SPARCV8 -check-prefix=SPARC // RUN: %clang_cc1 -triple sparcv9-unknown-eabi -emit-llvm %s -o - | FileCheck %s -check-prefix=SPARCV9 -check-prefix=SPARC +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s -check-prefix=NVPTX unsigned char c1, c2; unsigned short s1, s2; @@ -109,4 +110,17 @@ void test1(void) { // SPARCV9: store atomic i64 {{.*}}, ptr @ll1 seq_cst, align 8 // SPARCV8: call void @__atomic_load(i32 noundef 100, ptr noundef @a1, ptr noundef @a2 // SPARCV8: call void @__atomic_store(i32 noundef 100, ptr noundef @a1, ptr noundef @a2 + +// NVPTX-LABEL: define{{.*}} void @test1 +// NVPTX: = load atomic i8, ptr @c1 seq_cst, align 1 +// NVPTX: store atomic i8 {{.*}}, ptr @c1 seq_cst, align 1 +// NVPTX: = load atomic i16, ptr @s1 seq_cst, align 2 +// NVPTX: store atomic i16 {{.*}}, ptr @s1 seq_cst, align 2 +// NVPTX: = load atomic i32, ptr @i1 seq_cst, align 4 +// NVPTX: store atomic i32 {{.*}}, ptr @i1 seq_cst, align 4 +// NVPTX: = load atomic i64, ptr @ll1 seq_cst, align 8 +// NVPTX: store atomic i64 {{.*}}, ptr @ll1 seq_cst, align 8 +// NVPTX: call void @__atomic_load(i64 noundef 100, ptr noundef @a1, ptr noundef @a2, i32 noundef 5) +// NVPTX: call void @__atomic_store(i64 noundef 100, ptr noundef @a1, ptr noundef @a2, i32 noundef 5) + } diff --git a/clang/test/CodeGen/builtin-assume-separate-storage.c b/clang/test/CodeGen/builtin-assume-separate-storage.c new file mode 100644 index 0000000000000..ac82f27b3e720 --- /dev/null +++ b/clang/test/CodeGen/builtin-assume-separate-storage.c @@ -0,0 +1,36 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s +void *nonconst(void); + +// CHECK-LABEL: @test1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store ptr [[B:%.*]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "separate_storage"(ptr [[TMP0]], ptr [[TMP1]]) ] +// CHECK-NEXT: ret void +// +void test1(int *a, int *b) { + + __builtin_assume_separate_storage(a, b); +} + +// Separate storage assumptions evaluate their arguments unconditionally, like +// assume_aligned but *unlike* assume. Check that we actually do so. +// CHECK-LABEL: @test2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store ptr [[B:%.*]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[CALL:%.*]] = call ptr @nonconst() +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "separate_storage"(ptr [[TMP0]], ptr [[CALL]]) ] +// CHECK-NEXT: ret void +// +void test2(int *a, int *b) { + __builtin_assume_separate_storage(a, nonconst()); +} diff --git a/clang/test/CodeGenCXX/module-intializer.cpp b/clang/test/CodeGenCXX/module-intializer.cpp index e5149401b467a..d365d180ac59d 100644 --- a/clang/test/CodeGenCXX/module-intializer.cpp +++ b/clang/test/CodeGenCXX/module-intializer.cpp @@ -18,17 +18,17 @@ // RUN: -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-P // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 M.cpp \ -// RUN: -fmodule-file=N.pcm -fmodule-file=O.pcm -fmodule-file=M-part.pcm \ +// RUN: -fmodule-file=N=N.pcm -fmodule-file=O=O.pcm -fmodule-file=M:Part=M-part.pcm \ // RUN: -emit-module-interface -o M.pcm // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 M.pcm -S -emit-llvm \ // RUN: -o - | FileCheck %s --check-prefix=CHECK-M // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 useM.cpp \ -// RUN: -fmodule-file=M.pcm -S -emit-llvm -o - \ +// RUN: -fmodule-file=M=M.pcm -S -emit-llvm -o - \ // RUN: | FileCheck %s --check-prefix=CHECK-USE // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 M-impl.cpp \ -// RUN: -fmodule-file=M.pcm -S -emit-llvm -o - \ +// RUN: -fmodule-file=M=M.pcm -S -emit-llvm -o - \ // RUN: | FileCheck %s --check-prefix=CHECK-IMPL // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 N.cpp -S -emit-llvm \ @@ -41,7 +41,7 @@ // RUN: -o - | FileCheck %s --check-prefix=CHECK-P // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 M.cpp \ -// RUN: -fmodule-file=N.pcm -fmodule-file=O.pcm -fmodule-file=M-part.pcm \ +// RUN: -fmodule-file=N.pcm -fmodule-file=O=O.pcm -fmodule-file=M:Part=M-part.pcm \ // RUN: -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-M //--- N-h.h diff --git a/clang/test/Driver/android-ndk-standalone.cpp b/clang/test/Driver/android-ndk-standalone.cpp index aeb2678c9caae..397460dbd7803 100644 --- a/clang/test/Driver/android-ndk-standalone.cpp +++ b/clang/test/Driver/android-ndk-standalone.cpp @@ -246,22 +246,6 @@ // CHECK-ARM64: "-L{{.*}}/lib/gcc/aarch64-linux-android/4.9/../../../../aarch64-linux-android/lib" // // RUN: %clang -### %s 2>&1 \ -// RUN: --target=mipsel-linux-android21 \ -// RUN: -mips32 \ -// RUN: --gcc-toolchain=%S/Inputs/basic_android_ndk_tree \ -// RUN: --sysroot=%S/Inputs/basic_android_ndk_tree/sysroot \ -// RUN: | FileCheck --check-prefix=CHECK-MIPS %s -// CHECK-MIPS: "-cc1" -// CHECK-MIPS: "-internal-isystem" "{{.*}}/include/c++/v1" -// CHECK-MIPS: "-internal-externc-isystem" "{{.*}}/sysroot/include" -// CHECK-MIPS: "-internal-externc-isystem" "{{.*}}/sysroot/usr/include" -// CHECK-MIPS: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]" -// CHECK-MIPS: "-L{{.*}}/lib/gcc/mipsel-linux-android/4.9" -// CHECK-MIPS: "-L{{.*}}/sysroot/usr/lib/mipsel-linux-android/21" -// CHECK-MIPS: "-L{{.*}}/sysroot/usr/lib/mipsel-linux-android" -// CHECK-MIPS: "-L{{.*}}/lib/gcc/mipsel-linux-android/4.9/../../../../mipsel-linux-android/lib" -// -// RUN: %clang -### %s 2>&1 \ // RUN: --target=i686-linux-android21 \ // RUN: --gcc-toolchain=%S/Inputs/basic_android_ndk_tree \ // RUN: --sysroot=%S/Inputs/basic_android_ndk_tree/sysroot \ diff --git a/clang/test/Driver/android-pie.c b/clang/test/Driver/android-pie.c index c006b90891e89..8620e18565458 100644 --- a/clang/test/Driver/android-pie.c +++ b/clang/test/Driver/android-pie.c @@ -8,11 +8,6 @@ // RUN: %clang %s -### -o %t.o 2>&1 --target=arm-linux-android24 \ // RUN: | FileCheck --check-prefix=PIE %s -// RUN: %clang %s -### -o %t.o 2>&1 --target=mipsel-linux-android \ -// RUN: | FileCheck --check-prefix=PIE %s -// RUN: %clang %s -### -o %t.o 2>&1 --target=mipsel-linux-android24 \ -// RUN: | FileCheck --check-prefix=PIE %s - // RUN: %clang %s -### -o %t.o 2>&1 --target=i686-linux-android \ // RUN: | FileCheck --check-prefix=PIE %s // RUN: %clang %s -### -o %t.o 2>&1 --target=i686-linux-android24 \ @@ -28,11 +23,6 @@ // RUN: %clang %s -### -o %t.o 2>&1 --target=arm64-linux-android24 \ // RUN: | FileCheck --check-prefix=PIE %s -// RUN: %clang %s -### -o %t.o 2>&1 --target=mips64el-linux-android \ -// RUN: | FileCheck --check-prefix=PIE %s -// RUN: %clang %s -### -o %t.o 2>&1 --target=mips64el-linux-android24 \ -// RUN: | FileCheck --check-prefix=PIE %s - // RUN: %clang %s -### -o %t.o 2>&1 --target=x86_64-linux-android \ // RUN: | FileCheck --check-prefix=PIE %s // RUN: %clang %s -### -o %t.o 2>&1 --target=x86_64-linux-android24 \ diff --git a/clang/test/Driver/android-standalone.cpp b/clang/test/Driver/android-standalone.cpp index 7363497c880a5..0246d1371deb9 100644 --- a/clang/test/Driver/android-standalone.cpp +++ b/clang/test/Driver/android-standalone.cpp @@ -45,51 +45,3 @@ // CHECK-ARM64: "-L{{.*}}/lib/gcc/aarch64-linux-android/4.8" // CHECK-ARM64: "-L{{.*}}/lib/gcc/aarch64-linux-android/4.8/../../../../aarch64-linux-android/lib" // CHECK-ARM64: "-L{{.*}}/sysroot/usr/lib" -// -// RUN: %clang -### %s 2>&1 \ -// RUN: --target=mipsel-linux-android \ -// RUN: -mips32 -stdlib=libstdc++ \ -// RUN: --gcc-toolchain=%S/Inputs/basic_android_tree \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: | FileCheck --check-prefix=CHECK-MIPS %s -// CHECK-MIPS: "-cc1" -// CHECK-MIPS: "-internal-isystem" "{{.*}}/mipsel-linux-android/include/c++/4.4.3" -// CHECK-MIPS: "-internal-isystem" "{{.*}}/mipsel-linux-android/include/c++/4.4.3/mipsel-linux-android" -// CHECK-MIPS: "-internal-externc-isystem" "{{.*}}/sysroot/include" -// CHECK-MIPS: "-internal-externc-isystem" "{{.*}}/sysroot/usr/include" -// CHECK-MIPS: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]" -// CHECK-MIPS: "-L{{.*}}/lib/gcc/mipsel-linux-android/4.4.3" -// CHECK-MIPS: "-L{{.*}}/lib/gcc/mipsel-linux-android/4.4.3/../../../../mipsel-linux-android/lib" -// CHECK-MIPS: "-L{{.*}}/sysroot/usr/lib" -// -// RUN: %clang -### %s 2>&1 \ -// RUN: --target=mipsel-linux-android \ -// RUN: -march=mips32 -mips32r2 -stdlib=libstdc++ \ -// RUN: --gcc-toolchain=%S/Inputs/basic_android_tree \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: | FileCheck --check-prefix=CHECK-MIPSR2 %s -// CHECK-MIPSR2: "-cc1" -// CHECK-MIPSR2: "-internal-isystem" "{{.*}}/mipsel-linux-android/include/c++/4.4.3" -// CHECK-MIPSR2: "-internal-isystem" "{{.*}}/mipsel-linux-android/include/c++/4.4.3/mipsel-linux-android" -// CHECK-MIPSR2: "-internal-externc-isystem" "{{.*}}/sysroot/include" -// CHECK-MIPSR2: "-internal-externc-isystem" "{{.*}}/sysroot/usr/include" -// CHECK-MIPSR2: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]" -// CHECK-MIPSR2: "-L{{.*}}/lib/gcc/mipsel-linux-android/4.4.3/mips-r2" -// CHECK-MIPSR2: "-L{{.*}}/lib/gcc/mipsel-linux-android/4.4.3/../../../../mipsel-linux-android/lib" -// CHECK-MIPSR2: "-L{{.*}}/sysroot/usr/lib" -// -// RUN: %clang -### %s 2>&1 \ -// RUN: --target=mipsel-linux-android \ -// RUN: -mips32 -march=mips32r2 -stdlib=libstdc++ \ -// RUN: --gcc-toolchain=%S/Inputs/basic_android_tree \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: | FileCheck --check-prefix=CHECK-MIPSR2-A %s -// CHECK-MIPSR2-A: "-cc1" -// CHECK-MIPSR2-A: "-internal-isystem" "{{.*}}/mipsel-linux-android/include/c++/4.4.3" -// CHECK-MIPSR2-A: "-internal-isystem" "{{.*}}/mipsel-linux-android/include/c++/4.4.3/mipsel-linux-android" -// CHECK-MIPSR2-A: "-internal-externc-isystem" "{{.*}}/sysroot/include" -// CHECK-MIPSR2-A: "-internal-externc-isystem" "{{.*}}/sysroot/usr/include" -// CHECK-MIPSR2-A: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]" -// CHECK-MIPSR2-A: "-L{{.*}}/lib/gcc/mipsel-linux-android/4.4.3/mips-r2" -// CHECK-MIPSR2-A: "-L{{.*}}/lib/gcc/mipsel-linux-android/4.4.3/../../../../mipsel-linux-android/lib" -// CHECK-MIPSR2-A: "-L{{.*}}/sysroot/usr/lib" diff --git a/clang/test/Driver/avr-ld.c b/clang/test/Driver/avr-ld.c index 3088bc00446f4..4042ecb89adf5 100644 --- a/clang/test/Driver/avr-ld.c +++ b/clang/test/Driver/avr-ld.c @@ -44,16 +44,16 @@ // RUN: %clang -### --target=avr -mmcu=atxmega128a1 --rtlib=libgcc --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKO %s // LINKO: {{".*ld.*"}} {{.*}} {{"-L.*avrxmega7"}} {{.*}} "--defsym=__DATA_REGION_ORIGIN__=0x802000" "--start-group" {{.*}} "-latxmega128a1" {{.*}} "--end-group" "--relax" "-mavrxmega7" -// RUN: %clang -### --target=avr -mmcu=atmega328 -flto --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKP %s +// RUN: %clang -### --target=avr -mmcu=atmega328 -fuse-ld=ld -flto --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck --check-prefix=LINKP %s // LINKP: {{".*ld.*"}} {{.*}} "--defsym=__DATA_REGION_ORIGIN__=0x800100" "-plugin" {{.*}} "-plugin-opt=mcpu=atmega328" -// RUN: %clang -### --target=avr -flto --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKQ %s +// RUN: %clang -### --target=avr -fuse-ld=ld -flto --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck --check-prefix=LINKQ %s // LINKQ: {{".*ld.*"}} {{.*}} "-plugin" // LINKQ-NOT: "-plugin-opt=mcpu" -// RUN: %clang -### --target=avr -mmcu=atmega328 -flto=thin --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKR %s -// LINKR: {{".*ld.*"}} {{.*}} "--defsym=__DATA_REGION_ORIGIN__=0x800100" "-plugin" {{.*}} "-plugin-opt=mcpu=atmega328" "-plugin-opt=thinlto" +// RUN: %clang -### --target=avr -mmcu=atmega328 -fuse-ld=lld -flto=thin --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKR %s +// LINKR: {{".*ld.*"}} {{.*}} "--defsym=__DATA_REGION_ORIGIN__=0x800100" "-plugin-opt=mcpu=atmega328" "-plugin-opt=thinlto" -// RUN: %clang -### --target=avr -mmcu=atmega328 -flto --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKS %s -// LINKS: {{".*ld.*"}} {{.*}} "--defsym=__DATA_REGION_ORIGIN__=0x800100" "-plugin" {{.*}} "-plugin-opt=mcpu=atmega328" +// RUN: %clang -### --target=avr -mmcu=atmega328 -fuse-ld=lld -flto --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKS %s +// LINKS: {{".*ld.*"}} {{.*}} "--defsym=__DATA_REGION_ORIGIN__=0x800100" "-plugin-opt=mcpu=atmega328" // LINKS-NOT: "-plugin-opt=thinlto" diff --git a/clang/test/Driver/clang-translation.c b/clang/test/Driver/clang-translation.c index ca98ca5e8228d..d950d9a4de9be 100644 --- a/clang/test/Driver/clang-translation.c +++ b/clang/test/Driver/clang-translation.c @@ -392,24 +392,6 @@ // MIPSR6EL: "-target-cpu" "mips32r6" // MIPSR6EL: "-mfloat-abi" "hard" -// RUN: %clang -target mipsel-linux-android -### -S %s 2>&1 | \ -// RUN: FileCheck -check-prefix=MIPSEL-ANDROID %s -// MIPSEL-ANDROID: clang -// MIPSEL-ANDROID: "-cc1" -// MIPSEL-ANDROID: "-target-cpu" "mips32" -// MIPSEL-ANDROID: "-target-feature" "+fpxx" -// MIPSEL-ANDROID: "-target-feature" "+nooddspreg" -// MIPSEL-ANDROID: "-mfloat-abi" "hard" - -// RUN: %clang -target mipsel-linux-android -### -S %s -mcpu=mips32r6 2>&1 | \ -// RUN: FileCheck -check-prefix=MIPSEL-ANDROID-R6 %s -// MIPSEL-ANDROID-R6: clang -// MIPSEL-ANDROID-R6: "-cc1" -// MIPSEL-ANDROID-R6: "-target-cpu" "mips32r6" -// MIPSEL-ANDROID-R6: "-target-feature" "+fp64" -// MIPSEL-ANDROID-R6: "-target-feature" "+nooddspreg" -// MIPSEL-ANDROID-R6: "-mfloat-abi" "hard" - // RUN: %clang -target mips64-linux-gnu -### -S %s 2>&1 | \ // RUN: FileCheck -check-prefix=MIPS64 %s // MIPS64: clang @@ -501,10 +483,3 @@ // MIPSN32R6EL: "-target-cpu" "mips64r6" // MIPSN32R6EL: "-target-abi" "n32" // MIPSN32R6EL: "-mfloat-abi" "hard" - -// RUN: %clang -target mips64el-linux-android -### -S %s 2>&1 | \ -// RUN: FileCheck -check-prefix=MIPS64EL-ANDROID %s -// MIPS64EL-ANDROID: clang -// MIPS64EL-ANDROID: "-cc1" -// MIPS64EL-ANDROID: "-target-cpu" "mips64r6" -// MIPS64EL-ANDROID: "-mfloat-abi" "hard" diff --git a/clang/test/Driver/constructors.c b/clang/test/Driver/constructors.c index f844e80a5450f..f210ad512f270 100644 --- a/clang/test/Driver/constructors.c +++ b/clang/test/Driver/constructors.c @@ -50,12 +50,6 @@ // RUN: | FileCheck --check-prefix=CHECK-INIT-ARRAY %s // // RUN: %clang -### %s -fsyntax-only 2>&1 \ -// RUN: --target=mipsel-unknown-linux-android \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: --gcc-toolchain="" \ -// RUN: | FileCheck --check-prefix=CHECK-INIT-ARRAY %s -// -// RUN: %clang -### %s -fsyntax-only 2>&1 \ // RUN: --target=i386-unknown-linux-android \ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ // RUN: --gcc-toolchain="" \ diff --git a/clang/test/Driver/linux-ld.c b/clang/test/Driver/linux-ld.c index 10e6bee183050..27786dce67cc6 100644 --- a/clang/test/Driver/linux-ld.c +++ b/clang/test/Driver/linux-ld.c @@ -1046,16 +1046,6 @@ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ // RUN: | FileCheck --check-prefix=CHECK-ANDROID %s // RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mipsel-linux-android -rtlib=platform --unwindlib=platform \ -// RUN: --gcc-toolchain="" \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID %s -// RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mips64el-linux-android -rtlib=platform --unwindlib=platform \ -// RUN: --gcc-toolchain="" \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID %s -// RUN: %clang -### %s -no-pie 2>&1 \ // RUN: --target=i686-linux-android -rtlib=platform --unwindlib=platform \ // RUN: --gcc-toolchain="" \ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ @@ -1101,18 +1091,6 @@ // RUN: -shared \ // RUN: | FileCheck --check-prefix=CHECK-ANDROID-SO %s // RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mipsel-linux-android -rtlib=platform --unwindlib=platform \ -// RUN: --gcc-toolchain="" \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: -shared \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID-SO %s -// RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mips64el-linux-android -rtlib=platform --unwindlib=platform \ -// RUN: --gcc-toolchain="" \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: -shared \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID-SO %s -// RUN: %clang -### %s -no-pie 2>&1 \ // RUN: --target=i686-linux-android -rtlib=platform --unwindlib=platform \ // RUN: --gcc-toolchain="" \ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ @@ -1158,18 +1136,6 @@ // RUN: -static \ // RUN: | FileCheck --check-prefix=CHECK-ANDROID-STATIC %s // RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mipsel-linux-android -rtlib=platform --unwindlib=platform \ -// RUN: --gcc-toolchain="" \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: -static \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID-STATIC %s -// RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mips64el-linux-android -rtlib=platform --unwindlib=platform \ -// RUN: --gcc-toolchain="" \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: -static \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID-STATIC %s -// RUN: %clang -### %s -no-pie 2>&1 \ // RUN: --target=i686-linux-android -rtlib=platform --unwindlib=platform \ // RUN: --gcc-toolchain="" \ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ @@ -1216,18 +1182,6 @@ // RUN: -pie \ // RUN: | FileCheck --check-prefix=CHECK-ANDROID-PIE %s // RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mipsel-linux-android -rtlib=platform --unwindlib=platform \ -// RUN: --gcc-toolchain="" \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: -pie \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID-PIE %s -// RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mips64el-linux-android -rtlib=platform --unwindlib=platform \ -// RUN: --gcc-toolchain="" \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: -pie \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID-PIE %s -// RUN: %clang -### %s -no-pie 2>&1 \ // RUN: --target=i686-linux-android -rtlib=platform --unwindlib=platform \ // RUN: --gcc-toolchain="" \ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ @@ -1259,11 +1213,6 @@ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ // RUN: | FileCheck --check-prefix=CHECK-ANDROID-32 %s // RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mipsel-linux-android \ -// RUN: --gcc-toolchain="" \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID-32 %s -// RUN: %clang -### %s -no-pie 2>&1 \ // RUN: --target=aarch64-linux-android \ // RUN: --gcc-toolchain="" \ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ @@ -1274,11 +1223,6 @@ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ // RUN: | FileCheck --check-prefix=CHECK-ANDROID-64 %s // RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mips64el-linux-android \ -// RUN: --gcc-toolchain="" \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID-64 %s -// RUN: %clang -### %s -no-pie 2>&1 \ // RUN: --target=i686-linux-android \ // RUN: --gcc-toolchain="" \ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ @@ -1313,15 +1257,6 @@ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ // RUN: | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s // RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mipsel-linux-android -pthread \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s -// RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mips64el-linux-android -pthread \ -// RUN: --gcc-toolchain="" \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s -// RUN: %clang -### %s -no-pie 2>&1 \ // RUN: --target=i686-linux-android -pthread \ // RUN: --gcc-toolchain="" \ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ @@ -1356,18 +1291,6 @@ // RUN: -shared \ // RUN: | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s // RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mipsel-linux-android -pthread \ -// RUN: --gcc-toolchain="" \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: -shared \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s -// RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mips64el-linux-android -pthread \ -// RUN: --gcc-toolchain="" \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: -shared \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s -// RUN: %clang -### %s -no-pie 2>&1 \ // RUN: --target=i686-linux-android -pthread \ // RUN: --gcc-toolchain="" \ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ diff --git a/clang/test/Driver/pic.c b/clang/test/Driver/pic.c index b05f363603a2a..daa3a55430068 100644 --- a/clang/test/Driver/pic.c +++ b/clang/test/Driver/pic.c @@ -280,9 +280,6 @@ // RUN: %clang -c %s -target arm-linux-androideabi24 -### 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-PIE2 // -// RUN: %clang -c %s -target mipsel-linux-android24 -### 2>&1 \ -// RUN: | FileCheck %s --check-prefix=CHECK-PIE1 -// // 64-bit Android targets are always PIE. // RUN: %clang -c %s -target aarch64-linux-android -### 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-PIE2 diff --git a/clang/test/Driver/riscv-arch.c b/clang/test/Driver/riscv-arch.c index 610f79d64ada2..b13da106df778 100644 --- a/clang/test/Driver/riscv-arch.c +++ b/clang/test/Driver/riscv-arch.c @@ -198,11 +198,6 @@ // Testing specific messages and unsupported extensions. -// RUN: %clang --target=riscv64-unknown-elf -march=rv64e -### %s \ -// RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV64E %s -// RV64E: error: invalid arch name 'rv64e', -// RV64E: standard user-level extension 'e' requires 'rv32' - // RUN: %clang --target=riscv32-unknown-elf -march=rv32imC -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32-LOWER %s // RV32-LOWER: error: invalid arch name 'rv32imC', @@ -211,7 +206,7 @@ // RUN: %clang --target=riscv32-unknown-elf -march=unknown -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32-STR %s // RV32-STR: error: invalid arch name 'unknown', -// RV32-STR: string must begin with rv32{i,e,g} or rv64{i,g} +// RV32-STR: string must begin with rv32{i,e,g} or rv64{i,e,g} // RUN: %clang --target=riscv32-unknown-elf -march=rv32q -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32-LETTER %s @@ -223,11 +218,6 @@ // RV32-ORDER: error: invalid arch name 'rv32imcq', // RV32-ORDER: standard user-level extension not given in canonical order 'q' -// RUN: %clang --target=riscv32-unknown-elf -march=rv64e -### %s \ -// RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV64-EER %s -// RV64-EER: error: invalid arch name 'rv64e', -// RV64-EER: standard user-level extension 'e' requires 'rv32' - // RUN: %clang --target=riscv32-unknown-elf -march=rv32izve32f -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32-ZVE32F-ER %s // RV32-ZVE32F-ER: error: invalid arch name 'rv32izve32f', diff --git a/clang/test/Driver/riscv-features.c b/clang/test/Driver/riscv-features.c index 98445b1920301..b189fdeacec8c 100644 --- a/clang/test/Driver/riscv-features.c +++ b/clang/test/Driver/riscv-features.c @@ -33,10 +33,6 @@ // DEFAULT-LINUX-SAME: "-target-feature" "+d" // DEFAULT-LINUX-SAME: "-target-feature" "+c" -// RUN: not %clang -cc1 -triple riscv64-unknown-elf -target-feature +e 2>&1 | FileCheck %s -check-prefix=RV64-WITH-E - -// RV64-WITH-E: error: invalid feature combination: standard user-level extension 'e' requires 'rv32' - // RUN: not %clang -c --target=riscv64-linux-gnu -gsplit-dwarf %s 2>&1 | FileCheck %s --check-prefix=ERR-SPLIT-DWARF // RUN: not %clang -c --target=riscv64 -gsplit-dwarf=single %s 2>&1 | FileCheck %s --check-prefix=ERR-SPLIT-DWARF // RUN: %clang -### -c --target=riscv64 -mno-relax -g -gsplit-dwarf %s 2>&1 | FileCheck %s --check-prefix=SPLIT-DWARF diff --git a/clang/test/ExtractAPI/objc_instancetype.m b/clang/test/ExtractAPI/objc_instancetype.m new file mode 100644 index 0000000000000..1680fe9336cf3 --- /dev/null +++ b/clang/test/ExtractAPI/objc_instancetype.m @@ -0,0 +1,254 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ + // RUN: %t/reference.output.json.in >> %t/reference.output.json +// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx -x objective-c-header %t/input.h -o %t/output.json -verify + +// Generator version is not consistent across test runs, normalize it. +// RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \ + // RUN: %t/output.json >> %t/output-normalized.json +// RUN: diff %t/reference.output.json %t/output-normalized.json + + +//--- input.h +@interface Foo +- (instancetype) init; +- (id) reset; +@end +// expected-no-diagnostics + + +//--- reference.output.json.in +{ + "metadata": { + "formatVersion": { + "major": 0, + "minor": 5, + "patch": 3 + }, + "generator": "?" + }, + "module": { + "name": "", + "platform": { + "architecture": "arm64", + "operatingSystem": { + "minimumVersion": { + "major": 11, + "minor": 0, + "patch": 0 + }, + "name": "macosx" + }, + "vendor": "apple" + } + }, + "relationships": [ + { + "kind": "memberOf", + "source": "c:objc(cs)Foo(im)init", + "target": "c:objc(cs)Foo", + "targetFallback": "Foo" + }, + { + "kind": "memberOf", + "source": "c:objc(cs)Foo(im)reset", + "target": "c:objc(cs)Foo", + "targetFallback": "Foo" + } + ], + "symbols": [ + { + "accessLevel": "public", + "declarationFragments": [ + { + "kind": "keyword", + "spelling": "@interface" + }, + { + "kind": "text", + "spelling": " " + }, + { + "kind": "identifier", + "spelling": "Foo" + } + ], + "identifier": { + "interfaceLanguage": "objective-c", + "precise": "c:objc(cs)Foo" + }, + "kind": { + "displayName": "Class", + "identifier": "objective-c.class" + }, + "location": { + "position": { + "character": 12, + "line": 1 + }, + "uri": "file://INPUT_DIR/input.h" + }, + "names": { + "navigator": [ + { + "kind": "identifier", + "spelling": "Foo" + } + ], + "subHeading": [ + { + "kind": "identifier", + "spelling": "Foo" + } + ], + "title": "Foo" + }, + "pathComponents": [ + "Foo" + ] + }, + { + "accessLevel": "public", + "declarationFragments": [ + { + "kind": "text", + "spelling": "- (" + }, + { + "kind": "keyword", + "spelling": "instancetype" + }, + { + "kind": "text", + "spelling": ") " + }, + { + "kind": "identifier", + "spelling": "init" + }, + { + "kind": "text", + "spelling": ";" + } + ], + "functionSignature": { + "returns": [ + { + "kind": "keyword", + "spelling": "instancetype" + } + ] + }, + "identifier": { + "interfaceLanguage": "objective-c", + "precise": "c:objc(cs)Foo(im)init" + }, + "kind": { + "displayName": "Instance Method", + "identifier": "objective-c.method" + }, + "location": { + "position": { + "character": 1, + "line": 2 + }, + "uri": "file://INPUT_DIR/input.h" + }, + "names": { + "navigator": [ + { + "kind": "identifier", + "spelling": "init" + } + ], + "subHeading": [ + { + "kind": "text", + "spelling": "- " + }, + { + "kind": "identifier", + "spelling": "init" + } + ], + "title": "init" + }, + "pathComponents": [ + "Foo", + "init" + ] + }, + { + "accessLevel": "public", + "declarationFragments": [ + { + "kind": "text", + "spelling": "- (" + }, + { + "kind": "keyword", + "spelling": "id" + }, + { + "kind": "text", + "spelling": ") " + }, + { + "kind": "identifier", + "spelling": "reset" + }, + { + "kind": "text", + "spelling": ";" + } + ], + "functionSignature": { + "returns": [ + { + "kind": "keyword", + "spelling": "id" + } + ] + }, + "identifier": { + "interfaceLanguage": "objective-c", + "precise": "c:objc(cs)Foo(im)reset" + }, + "kind": { + "displayName": "Instance Method", + "identifier": "objective-c.method" + }, + "location": { + "position": { + "character": 1, + "line": 3 + }, + "uri": "file://INPUT_DIR/input.h" + }, + "names": { + "navigator": [ + { + "kind": "identifier", + "spelling": "reset" + } + ], + "subHeading": [ + { + "kind": "text", + "spelling": "- " + }, + { + "kind": "identifier", + "spelling": "reset" + } + ], + "title": "reset" + }, + "pathComponents": [ + "Foo", + "reset" + ] + } + ] +} diff --git a/clang/test/Modules/pr61065.cppm b/clang/test/Modules/pr61065.cppm new file mode 100644 index 0000000000000..44fa3679974ad --- /dev/null +++ b/clang/test/Modules/pr61065.cppm @@ -0,0 +1,55 @@ +// From https://github.com/llvm/llvm-project/issues/61065 +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-module-interface -o %t/b.pcm \ +// RUN: -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 %t/c.cppm -emit-module-interface -o %t/c.pcm \ +// RUN: -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 %t/d.cpp -fsyntax-only -verify -fprebuilt-module-path=%t + +//--- a.cppm +export module a; + +struct base { + base(int) {} +}; + +export struct a : base { + using base::base; +}; + +//--- b.cppm +export module b; + +import a; + +a b() { + return a(1); +} + +//--- c.cppm +export module c; + +import a; +import b; + +struct noncopyable { + noncopyable(noncopyable const &) = delete; + noncopyable() = default; +}; + +export struct c { + noncopyable c0; + a c1 = 43; + c() = default; +}; + +//--- d.cpp +// expected-no-diagnostics +import c; +void d() { + c _; +} diff --git a/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c b/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c new file mode 100644 index 0000000000000..e33ad0b353f51 --- /dev/null +++ b/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c @@ -0,0 +1,305 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// REQUIRES: amdgpu-registered-target + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// RUN: %clang_cc1 -verify -fopenmp -x c -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host-amd.bc +// RUN: %clang_cc1 -verify -fopenmp -x c -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host-amd.bc -o - | FileCheck %s --check-prefix=CHECK-AMD + + +void write_to_aligned_array(int *a, int N) { + int *aptr __attribute__ ((aligned(64))) = a; + #pragma omp target teams distribute parallel for map(tofrom: aptr[0:N]) + for(int i = 0; i < N; i++) { + aptr[i] = i; + } +} + +#endif +// CHECK-AMD-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_write_to_aligned_array_l14 +// CHECK-AMD-SAME: (i64 noundef [[N:%.*]], ptr noundef [[APTR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-AMD-NEXT: entry: +// CHECK-AMD-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[APTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// CHECK-AMD-NEXT: [[APTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[APTR_ADDR]] to ptr +// CHECK-AMD-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr +// CHECK-AMD-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr +// CHECK-AMD-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr +// CHECK-AMD-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: store ptr [[APTR]], ptr [[APTR_ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 2, i1 false) +// CHECK-AMD-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-AMD-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-AMD: user_code.entry: +// CHECK-AMD-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// CHECK-AMD-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP2]], ptr [[N_CASTED_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP4:%.*]] = load ptr, ptr [[APTR_ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 +// CHECK-AMD-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP3]], ptr [[TMP4]]) #[[ATTR2:[0-9]+]] +// CHECK-AMD-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2) +// CHECK-AMD-NEXT: ret void +// CHECK-AMD: worker.exit: +// CHECK-AMD-NEXT: ret void +// +// +// CHECK-AMD-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK-AMD-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef [[APTR:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-AMD-NEXT: entry: +// CHECK-AMD-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[APTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5) +// CHECK-AMD-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// CHECK-AMD-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// CHECK-AMD-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// CHECK-AMD-NEXT: [[APTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[APTR_ADDR]] to ptr +// CHECK-AMD-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// CHECK-AMD-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// CHECK-AMD-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr +// CHECK-AMD-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr +// CHECK-AMD-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// CHECK-AMD-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr +// CHECK-AMD-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr +// CHECK-AMD-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// CHECK-AMD-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// CHECK-AMD-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr +// CHECK-AMD-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr +// CHECK-AMD-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// CHECK-AMD-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: store ptr [[APTR]], ptr [[APTR_ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// CHECK-AMD-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK-AMD-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK-AMD-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK-AMD-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// CHECK-AMD-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK-AMD-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK-AMD: omp.precond.then: +// CHECK-AMD-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-AMD-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-AMD-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-AMD-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK-AMD-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-AMD: cond.true: +// CHECK-AMD-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 +// CHECK-AMD-NEXT: br label [[COND_END:%.*]] +// CHECK-AMD: cond.false: +// CHECK-AMD-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: br label [[COND_END]] +// CHECK-AMD: cond.end: +// CHECK-AMD-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK-AMD-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-AMD: omp.inner.for.cond: +// CHECK-AMD-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK-AMD-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP11]], [[ADD]] +// CHECK-AMD-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-AMD: omp.inner.for.body: +// CHECK-AMD-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK-AMD-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 +// CHECK-AMD-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP17]], ptr [[N_CASTED_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP18:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP19:%.*]] = load ptr, ptr [[APTR_ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// CHECK-AMD-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP14]] to ptr +// CHECK-AMD-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 +// CHECK-AMD-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 +// CHECK-AMD-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP16]] to ptr +// CHECK-AMD-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8 +// CHECK-AMD-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 +// CHECK-AMD-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP18]] to ptr +// CHECK-AMD-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 +// CHECK-AMD-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3 +// CHECK-AMD-NEXT: store ptr [[TMP19]], ptr [[TMP26]], align 8 +// CHECK-AMD-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK-AMD-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__.1, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4) +// CHECK-AMD-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-AMD: omp.inner.for.inc: +// CHECK-AMD-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK-AMD-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK-AMD-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK-AMD-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK-AMD-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK-AMD: cond.true10: +// CHECK-AMD-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 +// CHECK-AMD-NEXT: br label [[COND_END12:%.*]] +// CHECK-AMD: cond.false11: +// CHECK-AMD-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: br label [[COND_END12]] +// CHECK-AMD: cond.end12: +// CHECK-AMD-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK-AMD-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-AMD: omp.inner.for.end: +// CHECK-AMD-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-AMD: omp.loop.exit: +// CHECK-AMD-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 +// CHECK-AMD-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP41]]) +// CHECK-AMD-NEXT: br label [[OMP_PRECOND_END]] +// CHECK-AMD: omp.precond.end: +// CHECK-AMD-NEXT: ret void +// +// +// CHECK-AMD-LABEL: define {{[^@]+}}@__omp_outlined__.1 +// CHECK-AMD-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef [[APTR:%.*]]) #[[ATTR1]] { +// CHECK-AMD-NEXT: entry: +// CHECK-AMD-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[APTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[I4:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// CHECK-AMD-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// CHECK-AMD-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr +// CHECK-AMD-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr +// CHECK-AMD-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// CHECK-AMD-NEXT: [[APTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[APTR_ADDR]] to ptr +// CHECK-AMD-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// CHECK-AMD-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// CHECK-AMD-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr +// CHECK-AMD-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr +// CHECK-AMD-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// CHECK-AMD-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr +// CHECK-AMD-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr +// CHECK-AMD-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// CHECK-AMD-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// CHECK-AMD-NEXT: [[I4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I4]] to ptr +// CHECK-AMD-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: store ptr [[APTR]], ptr [[APTR_ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// CHECK-AMD-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK-AMD-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK-AMD-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK-AMD-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// CHECK-AMD-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK-AMD-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK-AMD: omp.precond.then: +// CHECK-AMD-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK-AMD-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-AMD-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK-AMD-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3:[0-9]+]] to ptr), i32 [[TMP7]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1) +// CHECK-AMD-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-AMD: omp.inner.for.cond: +// CHECK-AMD-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[CONV5:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK-AMD-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP10]] +// CHECK-AMD-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-AMD: omp.inner.for.body: +// CHECK-AMD-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK-AMD-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-AMD-NEXT: store i32 [[ADD]], ptr [[I4_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP12:%.*]] = load i32, ptr [[I4_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP13:%.*]] = load ptr, ptr [[APTR_ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP14:%.*]] = load i32, ptr [[I4_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK-AMD-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[IDXPROM]] +// CHECK-AMD-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 +// CHECK-AMD-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-AMD: omp.body.continue: +// CHECK-AMD-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-AMD: omp.inner.for.inc: +// CHECK-AMD-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-AMD-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-AMD: omp.inner.for.end: +// CHECK-AMD-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-AMD: omp.loop.exit: +// CHECK-AMD-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK-AMD-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP18]]) +// CHECK-AMD-NEXT: br label [[OMP_PRECOND_END]] +// CHECK-AMD: omp.precond.end: +// CHECK-AMD-NEXT: ret void +// diff --git a/clang/test/OpenMP/declare_target_constexpr_codegen.cpp b/clang/test/OpenMP/declare_target_constexpr_codegen.cpp new file mode 100644 index 0000000000000..27161feef05e0 --- /dev/null +++ b/clang/test/OpenMP/declare_target_constexpr_codegen.cpp @@ -0,0 +1,40 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --prefix-filecheck-ir-name _ --global-value-regex "llvm.compiler.used" "_[0-9a-zA-Z]+A[0-9a-zA-Z]+pi[0-9a-zA-Z]+" "_[0-9a-zA-Z]+anotherPi" --version 2 +// REQUIRES: amdgpu-registered-target + + +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-debug -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK + +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +#pragma omp declare target +class A { +public: + static constexpr double pi = 3.141592653589793116; +//. +// CHECK: @_ZN1A2piE = linkonce_odr constant double 0x400921FB54442D18, comdat, align 8 +// CHECK: @_ZL9anotherPi = internal constant double 3.140000e+00, align 8 +// CHECK: @llvm.compiler.used = appending global [2 x ptr] [ptr @"__ZN1A2piE$ref", ptr @"__ZL9anotherPi$ref"], section "llvm.metadata" +//. + A() { ; } + ~A() { ; } +}; +#pragma omp end declare target + +void F(const double &); +void Test() { F(A::pi); } + +#pragma omp declare target +constexpr static double anotherPi = 3.14; +#pragma omp end declare target + +#endif + + +// +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// CHECK: {{.*}} diff --git a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp index c5c31c601ed39..010cbae25b9af 100644 --- a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp @@ -45,7 +45,7 @@ int main() { // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR7:[0-9]+]] +// CHECK1-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) @@ -78,7 +78,7 @@ int main() { // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR7]] +// CHECK1-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6]] // CHECK1-NEXT: ret void // // @@ -111,7 +111,7 @@ int main() { // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: call void @_Z4workPi(ptr noundef [[TMP1]]) #[[ATTR7]] +// CHECK1-NEXT: call void @_Z4workPi(ptr noundef [[TMP1]]) #[[ATTR6]] // CHECK1-NEXT: ret void // // @@ -119,19 +119,9 @@ int main() { // CHECK1-SAME: (ptr noundef [[C:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @__atomic_load(i64 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 0) #[[ATTR7]] -// CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] -// CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[ATOMIC_TEMP1]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef 0, i32 noundef 0) #[[ATTR7]] -// CHECK1-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] -// CHECK1: atomic_exit: +// CHECK1-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[TMP0]], i32 1 monotonic, align 4 // CHECK1-NEXT: ret void // // @@ -165,7 +155,7 @@ int main() { // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR7:[0-9]+]] +// CHECK2-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6:[0-9]+]] // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1) @@ -198,7 +188,7 @@ int main() { // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR7]] +// CHECK2-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6]] // CHECK2-NEXT: ret void // // @@ -231,7 +221,7 @@ int main() { // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK2-NEXT: call void @_Z4workPi(ptr noundef [[TMP1]]) #[[ATTR7]] +// CHECK2-NEXT: call void @_Z4workPi(ptr noundef [[TMP1]]) #[[ATTR6]] // CHECK2-NEXT: ret void // // @@ -239,19 +229,9 @@ int main() { // CHECK2-SAME: (ptr noundef [[C:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: call void @__atomic_load(i32 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 0) #[[ATTR7]] -// CHECK2-NEXT: br label [[ATOMIC_CONT:%.*]] -// CHECK2: atomic_cont: -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK2-NEXT: store i32 [[ADD]], ptr [[ATOMIC_TEMP1]], align 4 -// CHECK2-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i32 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef 0, i32 noundef 0) #[[ATTR7]] -// CHECK2-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] -// CHECK2: atomic_exit: +// CHECK2-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[TMP0]], i32 1 monotonic, align 4 // CHECK2-NEXT: ret void // // diff --git a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp index 2c70e05feafd0..845888dd32d19 100644 --- a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp @@ -467,6 +467,8 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 +// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR_CASTED1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: call void @_ZN3SSTIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[SST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 128 @@ -476,23 +478,29 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[VAR]], i32 3) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..3, ptr [[VEC]], ptr [[T_VAR]], ptr [[S_ARR]], ptr [[VAR]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[T_VAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..3, ptr [[VEC]], i32 [[TMP1]], ptr [[S_ARR]], ptr [[VAR]]) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[T_VAR_CASTED1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_CASTED1]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, i32 [[TMP3]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done1: +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done2: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP1]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP5]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC2ERi @@ -713,67 +721,63 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 128 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 128 +// CHECK1-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 128 +// CHECK1-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S.0], align 128 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 +// CHECK1-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 // CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 128 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR1]], align 128 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VEC2]], ptr align 128 [[TMP0]], i32 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VEC1]], ptr align 128 [[TMP0]], i32 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR5]], ptr nonnull align 4 dereferenceable(4) [[TMP3]], ptr [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 128 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 0 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX]], align 128 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[ARRAYIDX7]], ptr align 128 [[VAR5]], i32 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done3: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR4]], ptr nonnull align 4 dereferenceable(4) [[TMP2]], ptr [[AGG_TMP5]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 0 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 128 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[ARRAYIDX6]], ptr align 128 [[VAR4]], i32 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[OMP_ARRAYCPY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[OMP_ARRAYCPY_DONE3]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done9: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done8: // CHECK1-NEXT: ret void // // @@ -803,18 +807,14 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[T_VAR:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 +// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[T_VAR1]], align 128 +// CHECK1-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK1-NEXT: ret void // // @@ -1123,27 +1123,23 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[G:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[G:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 128 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, ptr [[TMP0]], align 128 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[G1]], align 128 -// CHECK3-NEXT: store i32 1, ptr [[G1]], align 128 +// CHECK3-NEXT: store i32 1, ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G1]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP1]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) // CHECK3-NEXT: ret void // @@ -1193,33 +1189,33 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[G_CASTED:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 4 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 -// CHECK4-NEXT: store i32 [[TMP0]], ptr [[SIVAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr @g, i32 [[TMP1]]) +// CHECK4-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 128 +// CHECK4-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_CASTED]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 +// CHECK4-NEXT: store i32 [[TMP2]], ptr [[SIVAR_CASTED]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., i32 [[TMP1]], i32 [[TMP3]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[G:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK4-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[G:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[G_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[G1:%.*]] = alloca i32, align 128 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, i32, [104 x i8], i32 }>, align 128 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[G]], ptr [[G_ADDR]], align 4 // CHECK4-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load volatile i32, ptr [[TMP0]], align 128 -// CHECK4-NEXT: store i32 [[TMP1]], ptr [[G1]], align 128 -// CHECK4-NEXT: store i32 1, ptr [[G1]], align 128 +// CHECK4-NEXT: store i32 1, ptr [[G_ADDR]], align 4 // CHECK4-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [104 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 128 @@ -1232,14 +1228,14 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [104 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp, ptr [[BLOCK_DESCRIPTOR]], align 16 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [104 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP2:%.*]] = load volatile i32, ptr [[G1]], align 128 -// CHECK4-NEXT: store volatile i32 [[TMP2]], ptr [[BLOCK_CAPTURED]], align 128 -// CHECK4-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [104 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP3]], ptr [[BLOCK_CAPTURED2]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 -// CHECK4-NEXT: call void [[TMP5]](ptr [[BLOCK]]) +// CHECK4-NEXT: [[TMP0:%.*]] = load volatile i32, ptr [[G_ADDR]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP0]], ptr [[BLOCK_CAPTURED]], align 128 +// CHECK4-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [104 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 5 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], ptr [[BLOCK_CAPTURED1]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK4-NEXT: call void [[TMP3]](ptr [[BLOCK]]) // CHECK4-NEXT: ret void // // @@ -1675,6 +1671,8 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 // CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 // CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 +// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR_CASTED1:%.*]] = alloca i64, align 8 // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: call void @_ZN3SSTIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[SST]]) // CHECK9-NEXT: store i32 0, ptr [[T_VAR]], align 128 @@ -1684,23 +1682,29 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 2) // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[VAR]], i32 3) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..3, ptr [[VEC]], ptr [[T_VAR]], ptr [[S_ARR]], ptr [[VAR]]) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[T_VAR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK9-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..3, ptr [[VEC]], i64 [[TMP1]], ptr [[S_ARR]], ptr [[VAR]]) +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[T_VAR_CASTED1]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[T_VAR_CASTED1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, i64 [[TMP3]]) // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done1: +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done2: // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP1]] +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP5]] // // // CHECK9-LABEL: define {{[^@]+}}@_ZN2SSC2ERi @@ -1921,67 +1925,63 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 -// CHECK9-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 128 -// CHECK9-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 128 +// CHECK9-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 128 +// CHECK9-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S.0], align 128 // CHECK9-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK9-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK9-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 +// CHECK9-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 128 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[T_VAR1]], align 128 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC2]], ptr align 128 [[TMP0]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC1]], ptr align 128 [[TMP0]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done4: -// CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR5]], ptr nonnull align 4 dereferenceable(4) [[TMP3]], ptr [[AGG_TMP6]]) -// CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 128 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 0 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX]], align 128 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i64 0, i64 0 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[ARRAYIDX7]], ptr align 128 [[VAR5]], i64 4, i1 false) -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done3: +// CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) +// CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR4]], ptr nonnull align 4 dereferenceable(4) [[TMP2]], ptr [[AGG_TMP5]]) +// CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i64 0, i64 0 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 128 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i64 0, i64 0 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[ARRAYIDX6]], ptr align 128 [[VAR4]], i64 4, i1 false) +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[OMP_ARRAYCPY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[OMP_ARRAYCPY_DONE3]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done9: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done8: // CHECK9-NEXT: ret void // // @@ -2011,18 +2011,14 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[T_VAR:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 +// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[T_VAR1]], align 128 +// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -2331,27 +2327,23 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[G:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[G:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[G1:%.*]] = alloca i32, align 128 // CHECK11-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 +// CHECK11-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK11-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load volatile i32, ptr [[TMP0]], align 128 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[G1]], align 128 -// CHECK11-NEXT: store i32 1, ptr [[G1]], align 128 +// CHECK11-NEXT: store i32 1, ptr [[G_ADDR]], align 4 // CHECK11-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[G1]], ptr [[TMP2]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK11-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP1]], align 8 // CHECK11-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK11-NEXT: ret void // @@ -2401,33 +2393,33 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK12-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK12-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 // CHECK12-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 // CHECK12-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK12-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK12-NEXT: [[TMP0:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 -// CHECK12-NEXT: store i32 [[TMP0]], ptr [[SIVAR_CASTED]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr @g, i64 [[TMP1]]) +// CHECK12-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 128 +// CHECK12-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 +// CHECK12-NEXT: store i32 [[TMP2]], ptr [[SIVAR_CASTED]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 +// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP3]]) // CHECK12-NEXT: ret void // // // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK12-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[G:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK12-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[G:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK12-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK12-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 // CHECK12-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[G1:%.*]] = alloca i32, align 128 // CHECK12-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, align 128 // CHECK12-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK12-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 +// CHECK12-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK12-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK12-NEXT: [[TMP1:%.*]] = load volatile i32, ptr [[TMP0]], align 128 -// CHECK12-NEXT: store i32 [[TMP1]], ptr [[G1]], align 128 -// CHECK12-NEXT: store i32 1, ptr [[G1]], align 128 +// CHECK12-NEXT: store i32 1, ptr [[G_ADDR]], align 4 // CHECK12-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 // CHECK12-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK12-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 128 @@ -2440,14 +2432,14 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK12-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK12-NEXT: store ptr @__block_descriptor_tmp, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK12-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 7 -// CHECK12-NEXT: [[TMP2:%.*]] = load volatile i32, ptr [[G1]], align 128 -// CHECK12-NEXT: store volatile i32 [[TMP2]], ptr [[BLOCK_CAPTURED]], align 128 -// CHECK12-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP3]], ptr [[BLOCK_CAPTURED2]], align 32 -// CHECK12-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK12-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK12-NEXT: call void [[TMP5]](ptr [[BLOCK]]) +// CHECK12-NEXT: [[TMP0:%.*]] = load volatile i32, ptr [[G_ADDR]], align 4 +// CHECK12-NEXT: store volatile i32 [[TMP0]], ptr [[BLOCK_CAPTURED]], align 128 +// CHECK12-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 5 +// CHECK12-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP1]], ptr [[BLOCK_CAPTURED1]], align 32 +// CHECK12-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK12-NEXT: call void [[TMP3]](ptr [[BLOCK]]) // CHECK12-NEXT: ret void // // diff --git a/clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp index a120b8bfb1904..d316ee8b3e411 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s @@ -7,11 +8,10 @@ // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s -// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY3 %s +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY4 %s // expected-no-diagnostics #ifndef ARRAY @@ -30,15 +30,6 @@ struct S { volatile double g; -// CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, %union{{.+}}, %union{{.+}}, i64, i64, i64, i32, i8* } -// CHECK-DAG: [[S_DOUBLE_TY:%.+]] = type { double } -// CHECK-DAG: [[PRIVATES_MAIN_TY:%.+]] = type {{.?}}{ [2 x [[S_DOUBLE_TY]]], [[S_DOUBLE_TY]], i32, [2 x i32] -// CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type { [2 x [[S_DOUBLE_TY]]]*, [[S_DOUBLE_TY]]* } -// CHECK-DAG: [[KMP_TASK_MAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [[PRIVATES_MAIN_TY]] } -// CHECK-DAG: [[S_INT_TY:%.+]] = type { i32 } -// CHECK-DAG: [[CAP_TMAIN_TY:%.+]] = type { [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* } -// CHECK-DAG: [[PRIVATES_TMAIN_TY:%.+]] = type { i32, [2 x i32], [2 x [[S_INT_TY]]], [[S_INT_TY]], [104 x i8] } -// CHECK-DAG: [[KMP_TASK_TMAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [{{[0-9]+}} x i8], [[PRIVATES_TMAIN_TY]] } template T tmain() { S ttt; @@ -58,48 +49,14 @@ T tmain() { int main() { static int sivar; #ifdef LAMBDA - // LAMBDA: [[G:@.+]] ={{.*}} global double - // LAMBDA: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0, - // LAMBDA-LABEL: @main - // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]]( [&]() { - // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( -// LAMBDA: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master( -// LAMBDA-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0 -// LAMBDA-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] -// LAMBDA: [[THEN]] -// LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) -// LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 -// LAMBDA: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 -// LAMBDA: [[G_VAL:%.+]] = load volatile double, double* @{{.+}}, -// LAMBDA: store volatile double [[G_VAL]], double* [[G_PRIVATE_ADDR]] - -// LAMBDA: [[SIVAR_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 -// LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* @{{.+}}, -// LAMBDA: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]] - -// LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null) -// LAMBDA: call {{.*}}void @__kmpc_end_master( -// LAMBDA-NEXT: br label {{%?}}[[EXIT]] -// LAMBDA: [[EXIT]] -// LAMBDA: ret + + #pragma omp parallel master taskloop firstprivate(g, sivar) for (int i = 0; i < 10; ++i) { - // LAMBDA: define {{.+}} void [[INNER_LAMBDA:@.+]](%{{.+}}* {{[^,]*}} [[ARG_PTR:%.+]]) - // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], - // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] - // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]] - // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]] - - // LAMBDA: store double* %{{.+}}, double** %{{.+}}, - // LAMBDA: define internal noundef i32 [[TASK_ENTRY]](i32 noundef %0, %{{.+}}* noalias noundef %1) + g = 1; sivar = 11; - // LAMBDA: store double 1.0{{.+}}, double* %{{.+}}, - // LAMBDA: store i{{[0-9]+}} 11, i{{[0-9]+}}* %{{.+}}, - // LAMBDA: call void [[INNER_LAMBDA]](% - // LAMBDA: ret [&]() { g = 2; sivar = 22; @@ -108,51 +65,13 @@ int main() { }(); return 0; #elif defined(BLOCKS) - // BLOCKS: [[G:@.+]] ={{.*}} global double - // BLOCKS: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0, - // BLOCKS-LABEL: @main - // BLOCKS: call void {{%.+}}(i8 ^{ - // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8* - // BLOCKS: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master( - // BLOCKS-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0 - // BLOCKS-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] - // BLOCKS: [[THEN]] - // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) - // BLOCKS: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 - // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 - // BLOCKS: [[G_VAL:%.+]] = load volatile double, double* @{{.+}}, - // BLOCKS: store volatile double [[G_VAL]], double* [[G_PRIVATE_ADDR]] - - // BLOCKS: [[SIVAR_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 - // BLOCKS: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* @{{.+}}, - // BLOCKS: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]] - // BLOCKS: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null) - // BLOCKS: call {{.*}}void @__kmpc_end_master( - // BLOCKS-NEXT: br label {{%?}}[[EXIT]] - // BLOCKS: [[EXIT]] - // BLOCKS: ret + #pragma omp parallel master taskloop firstprivate(g, sivar) for (int i = 0; i < 10; ++i) { - // BLOCKS: define {{.+}} void {{@.+}}(i8* - // BLOCKS-NOT: [[G]]{{[[^:word:]]}} - // BLOCKS: store double 2.0{{.+}}, double* - // BLOCKS-NOT: [[G]]{{[[^:word:]]}} - // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}} - // BLOCKS: store i{{[0-9]+}} 22, i{{[0-9]+}}* - // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}} - // BLOCKS: ret - - // BLOCKS: store double* %{{.+}}, double** %{{.+}}, - // BLOCKS: store i{{[0-9]+}}* %{{.+}}, i{{[0-9]+}}** %{{.+}}, - // BLOCKS: define internal noundef i32 [[TASK_ENTRY]](i32 noundef %0, %{{.+}}* noalias noundef %1) + g = 1; sivar = 11; - // BLOCKS: store double 1.0{{.+}}, double* %{{.+}}, - // BLOCKS-NOT: [[G]]{{[[^:word:]]}} - // BLOCKS: store i{{[0-9]+}} 11, i{{[0-9]+}}* %{{.+}}, - // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}} - // BLOCKS: call void {{%.+}}(i8 ^{ g = 2; sivar = 22; @@ -177,303 +96,86 @@ int main() { #endif } -// CHECK: [[SIVAR:.+]] = internal global i{{[0-9]+}} 0, -// CHECK: define{{.*}} i{{[0-9]+}} @main() -// CHECK: alloca [[S_DOUBLE_TY]], -// CHECK: [[TEST:%.+]] = alloca [[S_DOUBLE_TY]], -// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32, -// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32], -// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]], -// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]], - -// CHECK: call {{.*}} [[S_DOUBLE_TY_COPY_CONSTR:@.+]]([[S_DOUBLE_TY]]* {{[^,]*}} [[TEST]], - -// CHECK: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master( -// CHECK-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0 -// CHECK-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] -// CHECK: [[THEN]] + + // Store original variables in capture struct. -// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: store [2 x [[S_DOUBLE_TY]]]* %{{.+}}, [2 x [[S_DOUBLE_TY]]]** [[S_ARR_REF]], -// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: store [[S_DOUBLE_TY]]* %{{.+}}, [[S_DOUBLE_TY]]** [[VAR_REF]], // Allocate task. // Returns struct kmp_task_t { // [[KMP_TASK_T]] task_data; // [[KMP_TASK_MAIN_TY]] privates; // }; -// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 9, i64 120, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) -// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_MAIN_TY]]* // Fill kmp_task_t->shareds by copying from original capture argument. -// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]], -// CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_MAIN_TY]]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 16, i1 false) // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). // Also copy address of private copy to the corresponding shareds reference. -// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 // Constructors for s_arr and var. // s_arr; -// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: bitcast [2 x [[S_DOUBLE_TY]]]* %{{.+}} to [[S_DOUBLE_TY]]* -// CHECK: call void [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* {{[^,]*}} [[S_ARR_CUR:%[^,]+]], -// CHECK: getelementptr [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* [[S_ARR_CUR]], i{{.+}} 1 -// CHECK: getelementptr [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} 1 -// CHECK: icmp eq -// CHECK: br i1 // var; -// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 -// CHECK-NEXT: call void [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* {{[^,]*}} [[PRIVATE_VAR_REF]], [[S_DOUBLE_TY]]* {{.*}}, // t_var; -// CHECK: [[PRIVATE_T_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 -// CHECK-NEXT: [[T_VAR:%.+]] = load i32, i32* %{{.+}}, -// CHECK-NEXT: store i32 [[T_VAR]], i32* [[PRIVATE_T_VAR_REF]], // vec; -// CHECK: [[PRIVATE_VEC_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 -// CHECK-NEXT: bitcast [2 x i32]* [[PRIVATE_VEC_REF]] to i8* -// CHECK-NEXT: bitcast [2 x i32]* %{{.+}} to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64( // sivar; -// CHECK: [[PRIVATE_SIVAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 4 -// CHECK-NEXT: [[SIVAR:%.+]] = load i32, i32* @{{.+}}, -// CHECK-NEXT: store i32 [[SIVAR]], i32* [[PRIVATE_SIVAR_REF]], // Provide pointer to destructor function, which will destroy private variables at the end of the task. -// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3 -// CHECK: [[DESTRUCTORS_PTR:%.+]] = bitcast %union{{.+}}* [[DESTRUCTORS_REF]] to i32 (i32, i8*)** -// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_PTR]], // Start task. -// CHECK: call void @__kmpc_taskloop(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*)) -// CHECK: call {{.*}}void @__kmpc_end_master( -// CHECK-NEXT: br label {{%?}}[[EXIT]] -// CHECK: [[EXIT]] - -// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_MAIN_TY]]* noalias noundef %0, [[S_DOUBLE_TY]]** noalias noundef %1, i32** noalias noundef %2, [2 x [[S_DOUBLE_TY]]]** noalias noundef %3, [2 x i32]** noalias noundef %4, i32** noalias noundef %5) -// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_MAIN_TY]]*, [[PRIVATES_MAIN_TY]]** -// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 0 -// CHECK: [[ARG3:%.+]] = load [2 x [[S_DOUBLE_TY]]]**, [2 x [[S_DOUBLE_TY]]]*** %{{.+}}, -// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[PRIV_S_VAR]], [2 x [[S_DOUBLE_TY]]]** [[ARG3]], -// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 1 -// CHECK: [[ARG1:%.+]] = load [[S_DOUBLE_TY]]**, [[S_DOUBLE_TY]]*** {{.+}}, -// CHECK: store [[S_DOUBLE_TY]]* [[PRIV_VAR]], [[S_DOUBLE_TY]]** [[ARG1]], -// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 2 -// CHECK: [[ARG2:%.+]] = load i32**, i32*** %{{.+}}, -// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG2]], -// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 3 -// CHECK: [[ARG4:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}}, -// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG4]], -// CHECK: [[PRIV_SIVAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 4 -// CHECK: [[ARG5:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** %{{.+}}, -// CHECK: store i{{[0-9]+}}* [[PRIV_SIVAR]], i{{[0-9]+}}** [[ARG5]], -// CHECK: ret void - -// CHECK: define internal noundef i32 [[TASK_ENTRY]](i32 noundef %0, [[KMP_TASK_MAIN_TY]]* noalias noundef %1) - -// CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]]*, -// CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, -// CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, -// CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, -// CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], -// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], - -// CHECK: [[FN:%.+]] = bitcast void (i8*, ...)* [[MAP_FN]] to void (i8*, -// CHECK: call void [[FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) - -// CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], -// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], -// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_DOUBLE_TY]]]*, [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], -// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]], -// CHECK: [[PRIV_SIVAR:%.+]] = load i32*, i32** [[PRIV_SIVAR_ADDR]], + + + + + // Privates actually are used. -// CHECK-DAG: [[PRIV_VAR]] -// CHECK-DAG: [[PRIV_T_VAR]] -// CHECK-DAG: [[PRIV_S_ARR]] -// CHECK-DAG: [[PRIV_VEC]] -// CHECK-DAG: [[PRIV_SIVAR]] - -// CHECK: ret - -// CHECK: define internal void [[MAIN_DUP]]([[KMP_TASK_MAIN_TY]]* noundef %0, [[KMP_TASK_MAIN_TY]]* noundef %1, i32 noundef %2) -// CHECK: getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* %{{.+}}, i32 0, i32 1 -// CHECK: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* %{{.+}}, i32 0, i32 0 -// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* %{{.+}}, i32 0, i32 0 -// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i64 2 -// CHECK: br i1 % - -// CHECK: phi [[S_DOUBLE_TY]]* -// CHECK: call {{.*}} [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* -// CHECK: getelementptr [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i32 1 -// CHECK: icmp eq [[S_DOUBLE_TY]]* % -// CHECK: br i1 % - -// CHECK: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* %{{.+}}, i32 0, i32 1 -// CHECK: call {{.*}} [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* -// CHECK: ret void - -// CHECK: define internal noundef i32 [[DESTRUCTORS]](i32 noundef %{{.+}}, [[KMP_TASK_MAIN_TY]]* noalias noundef %{{.+}}) -// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 -// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 -// CHECK: call void @_ZN1SIdED1Ev([[S_DOUBLE_TY]]* {{[^,]*}} [[PRIVATE_VAR_REF]]) -// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 -// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} 2 -// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} -1 -// CHECK: call void @_ZN1SIdED1Ev([[S_DOUBLE_TY]]* {{[^,]*}} [[PRIVATE_S_ARR_ELEM_REF]]) -// CHECK: icmp eq -// CHECK: br i1 -// CHECK: ret i32 - -// CHECK: alloca [[S_INT_TY]], -// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], -// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32, align 128 -// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32], -// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]], -// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]], - -// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR:@.+]]([[S_INT_TY]]* {{[^,]*}} [[TEST]], + + + + + + + // Store original variables in capture struct. -// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: store [2 x [[S_INT_TY]]]* %{{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_REF]], -// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: store [[S_INT_TY]]* %{{.+}}, [[S_INT_TY]]** [[VAR_REF]], // Allocate task. // Returns struct kmp_task_t { // [[KMP_TASK_T_TY]] task_data; // [[KMP_TASK_TMAIN_TY]] privates; // }; -// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 9, i64 256, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) -// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_TMAIN_TY]]* // Fill kmp_task_t->shareds by copying from original capture argument. -// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]], -// CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_TMAIN_TY]]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 16, i1 false) // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). -// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 // t_var; -// CHECK: [[PRIVATE_T_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 -// CHECK: [[T_VAR:%.+]] = load i32, i32* %{{.+}}, align 128 -// CHECK: store i32 [[T_VAR]], i32* [[PRIVATE_T_VAR_REF]], align 128 // vec; -// CHECK: [[PRIVATE_VEC_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 -// CHECK-NEXT: bitcast [2 x i32]* [[PRIVATE_VEC_REF]] to i8* -// CHECK-NEXT: bitcast [2 x i32]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64( // Constructors for s_arr and var. // a_arr; -// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 -// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 -// CHECK: bitcast [2 x [[S_INT_TY]]]* %{{.+}} to [[S_INT_TY]]* -// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2 -// CHECK: call void [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* {{[^,]*}} [[S_ARR_CUR:%[^,]+]], -// CHECK: getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_CUR]], i{{.+}} 1 -// CHECK: icmp eq -// CHECK: br i1 // var; -// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 -// CHECK-NEXT: call void [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* {{[^,]*}} [[PRIVATE_VAR_REF]], // Provide pointer to destructor function, which will destroy private variables at the end of the task. -// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3 -// CHECK: [[DESTRUCTORS_PTR:%.+]] = bitcast %union{{.+}}* [[DESTRUCTORS_REF]] to i32 (i32, i8*)** -// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_PTR]], // Start task. -// CHECK: call void @__kmpc_taskloop(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_TMAIN_TY]]*, [[KMP_TASK_TMAIN_TY]]*, i32)* [[TMAIN_DUP:@.+]] to i8*)) - -// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_TMAIN_TY]]* noalias noundef %{{.+}}, i32** noalias noundef %{{.+}}, [2 x i32]** noalias noundef %{{.+}}, [2 x [[S_INT_TY]]]** noalias noundef %{{.+}}, [[S_INT_TY]]** noalias noundef %{{.+}}) -// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_TMAIN_TY]]*, [[PRIVATES_TMAIN_TY]]** -// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 0 -// CHECK: [[ARG1:%.+]] = load i32**, i32*** %{{.+}}, -// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG1]], -// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 1 -// CHECK: [[ARG2:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}}, -// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG2]], -// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 2 -// CHECK: [[ARG3:%.+]] = load [2 x [[S_INT_TY]]]**, [2 x [[S_INT_TY]]]*** %{{.+}}, -// CHECK: store [2 x [[S_INT_TY]]]* [[PRIV_S_VAR]], [2 x [[S_INT_TY]]]** [[ARG3]], -// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 3 -// CHECK: [[ARG4:%.+]] = load [[S_INT_TY]]**, [[S_INT_TY]]*** {{.+}}, -// CHECK: store [[S_INT_TY]]* [[PRIV_VAR]], [[S_INT_TY]]** [[ARG4]], -// CHECK: ret void - -// CHECK: define internal noundef i32 [[TASK_ENTRY]](i32 noundef %0, [[KMP_TASK_TMAIN_TY]]* noalias noundef %1) -// CHECK: alloca i32*, -// CHECK-DAG: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, -// CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, -// CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, -// CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], -// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], -// CHECK: [[FN:%.+]] = bitcast void (i8*, ...)* [[MAP_FN]] to void (i8*, -// CHECK: call void [[FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) -// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], -// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]], -// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], -// CHECK: [[PRIV_VAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[PRIV_VAR_ADDR]], + + // Privates actually are used. -// CHECK-DAG: [[PRIV_VAR]] -// CHECK-DAG: [[PRIV_T_VAR]] -// CHECK-DAG: [[PRIV_S_ARR]] -// CHECK-DAG: [[PRIV_VEC]] - -// CHECK: ret - -// CHECK: define internal void [[TMAIN_DUP]]([[KMP_TASK_TMAIN_TY]]* noundef %0, [[KMP_TASK_TMAIN_TY]]* noundef %1, i32 noundef %2) -// CHECK: getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* %{{.+}}, i32 0, i32 2 -// CHECK: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* %{{.+}}, i32 0, i32 2 -// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* %{{.+}}, i32 0, i32 0 -// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i64 2 -// CHECK: br i1 % - -// CHECK: phi [[S_INT_TY]]* -// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* -// CHECK: getelementptr [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i32 1 -// CHECK: icmp eq [[S_INT_TY]]* % -// CHECK: br i1 % - -// CHECK: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* %{{.+}}, i32 0, i32 3 -// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* -// CHECK: ret void - -// CHECK: define internal noundef i32 [[DESTRUCTORS]](i32 noundef %0, [[KMP_TASK_TMAIN_TY]]* noalias noundef %1) -// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 -// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 -// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 -// CHECK: call void @_ZN1SIiED1Ev([[S_INT_TY]]* {{[^,]*}} [[PRIVATE_VAR_REF]]) -// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 -// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2 -// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} -1 -// CHECK: call void @_ZN1SIiED1Ev([[S_INT_TY]]* {{[^,]*}} [[PRIVATE_S_ARR_ELEM_REF]]) -// CHECK: icmp eq -// CHECK: br i1 -// CHECK: ret i32 + + + + + #endif #else -// ARRAY-LABEL: array_func struct St { int a, b; St() : a(0), b(0) {} @@ -482,13 +184,2207 @@ struct St { }; void array_func(int n, float a[n], St s[2]) { -// ARRAY: call i8* @__kmpc_omp_task_alloc( -// ARRAY: call void @__kmpc_taskloop( -// ARRAY: store float** %{{.+}}, float*** %{{.+}}, -// ARRAY: store %struct.St** %{{.+}}, %struct.St*** %{{.+}}, #pragma omp parallel master taskloop firstprivate(a, s) for (int i = 0; i < 10; ++i) ; } #endif +// CHECK-LABEL: define {{[^@]+}}@main +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S]], align 8 +// CHECK-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 16 +// CHECK-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 8 +// CHECK-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// CHECK-NEXT: call void @_ZN1SIdEC1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) +// CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]], double noundef 0.000000e+00) +// CHECK-NEXT: store i32 0, i32* [[T_VAR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const.main.vec to i8*), i64 8, i1 false) +// CHECK-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 +// CHECK-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_BEGIN]], double noundef 1.000000e+00) +// CHECK-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_ELEMENT]], double noundef 2.000000e+00) +// CHECK-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]], double noundef 3.000000e+00) +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* +// CHECK-NEXT: store i32 [[TMP1]], i32* [[CONV]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[VEC]], i64 [[TMP2]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[VAR]]) +// CHECK-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() +// CHECK-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 +// CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]]) #[[ATTR4:[0-9]+]] +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK: arraydestroy.body: +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP3]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK: arraydestroy.done1: +// CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]]) #[[ATTR4]] +// CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP4]] +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ev +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIdEC2Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC1ERKS0_d +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[S:%.*]], double noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[S_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[T_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 +// CHECK-NEXT: store double [[T]], double* [[T_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[T_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIdEC2ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP0]], double noundef [[TMP1]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ed +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], double noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store double [[A]], double* [[A_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[A_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIdEC2Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]], double noundef [[TMP0]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_outlined. +// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 8 dereferenceable(16) [[S_ARR:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 +// CHECK-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 +// CHECK-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 +// CHECK-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 +// CHECK-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 +// CHECK-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 +// CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK: omp_if.then: +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP7]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP8]], align 8 +// CHECK-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: [[TMP9:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 9, i64 120, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to %struct.kmp_task_t_with_privates* +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP10]], i32 0, i32 0 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 0 +// CHECK-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP13]], i8* align 8 [[TMP14]], i64 16, i1 false) +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP10]], i32 0, i32 1 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 0 +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP16]], i32 0, i32 0 +// CHECK-NEXT: [[TMP17:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP18]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK: omp.arraycpy.body: +// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP17]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[OMP_ARRAYCPY_SRCELEMENTPAST]], double noundef 0.000000e+00) +// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK: omp.arraycpy.done1: +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 1 +// CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP19]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP2]], double noundef 0.000000e+00) +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 2 +// CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK-NEXT: store i32 [[TMP21]], i32* [[TMP20]], align 8 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 3 +// CHECK-NEXT: [[TMP23:%.*]] = bitcast [2 x i32]* [[TMP22]] to i8* +// CHECK-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP23]], i8* align 4 [[TMP24]], i64 8, i1 false) +// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 4 +// CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 +// CHECK-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4 +// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 3 +// CHECK-NEXT: [[TMP28:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP27]] to i32 (i32, i8*)** +// CHECK-NEXT: store i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_destructor. to i32 (i32, i8*)*), i32 (i32, i8*)** [[TMP28]], align 8 +// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 5 +// CHECK-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 6 +// CHECK-NEXT: store i64 9, i64* [[TMP30]], align 8 +// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 7 +// CHECK-NEXT: store i64 1, i64* [[TMP31]], align 8 +// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 9 +// CHECK-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i8* +// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP33]], i8 0, i64 8, i1 false) +// CHECK-NEXT: [[TMP34:%.*]] = load i64, i64* [[TMP31]], align 8 +// CHECK-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP9]], i32 1, i64* [[TMP29]], i64* [[TMP30]], i64 [[TMP34]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates*, i32)* @.omp_task_dup. to i8*)) +// CHECK-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: br label [[OMP_IF_END]] +// CHECK: omp_if.end: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_privates_map. +// CHECK-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], %struct.S** noalias noundef [[TMP1:%.*]], i32** noalias noundef [[TMP2:%.*]], [2 x %struct.S]** noalias noundef [[TMP3:%.*]], [2 x i32]** noalias noundef [[TMP4:%.*]], i32** noalias noundef [[TMP5:%.*]]) #[[ATTR6:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.S**, align 8 +// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i32**, align 8 +// CHECK-NEXT: [[DOTADDR3:%.*]] = alloca [2 x %struct.S]**, align 8 +// CHECK-NEXT: [[DOTADDR4:%.*]] = alloca [2 x i32]**, align 8 +// CHECK-NEXT: [[DOTADDR5:%.*]] = alloca i32**, align 8 +// CHECK-NEXT: store %struct..kmp_privates.t* [[TMP0]], %struct..kmp_privates.t** [[DOTADDR]], align 8 +// CHECK-NEXT: store %struct.S** [[TMP1]], %struct.S*** [[DOTADDR1]], align 8 +// CHECK-NEXT: store i32** [[TMP2]], i32*** [[DOTADDR2]], align 8 +// CHECK-NEXT: store [2 x %struct.S]** [[TMP3]], [2 x %struct.S]*** [[DOTADDR3]], align 8 +// CHECK-NEXT: store [2 x i32]** [[TMP4]], [2 x i32]*** [[DOTADDR4]], align 8 +// CHECK-NEXT: store i32** [[TMP5]], i32*** [[DOTADDR5]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load %struct..kmp_privates.t*, %struct..kmp_privates.t** [[DOTADDR]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 0 +// CHECK-NEXT: [[TMP8:%.*]] = load [2 x %struct.S]**, [2 x %struct.S]*** [[DOTADDR3]], align 8 +// CHECK-NEXT: store [2 x %struct.S]* [[TMP7]], [2 x %struct.S]** [[TMP8]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 1 +// CHECK-NEXT: [[TMP10:%.*]] = load %struct.S**, %struct.S*** [[DOTADDR1]], align 8 +// CHECK-NEXT: store %struct.S* [[TMP9]], %struct.S** [[TMP10]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 2 +// CHECK-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[DOTADDR2]], align 8 +// CHECK-NEXT: store i32* [[TMP11]], i32** [[TMP12]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 3 +// CHECK-NEXT: [[TMP14:%.*]] = load [2 x i32]**, [2 x i32]*** [[DOTADDR4]], align 8 +// CHECK-NEXT: store [2 x i32]* [[TMP13]], [2 x i32]** [[TMP14]], align 8 +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 4 +// CHECK-NEXT: [[TMP16:%.*]] = load i32**, i32*** [[DOTADDR5]], align 8 +// CHECK-NEXT: store i32* [[TMP15]], i32** [[TMP16]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_entry. +// CHECK-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// CHECK-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[DOTLB__ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTUB__ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [2 x %struct.S]*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca [2 x i32]*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR4_I:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[I_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// CHECK-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 +// CHECK-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* +// CHECK-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 +// CHECK-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 +// CHECK-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 7 +// CHECK-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP16]], align 8 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 8 +// CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 +// CHECK-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, %struct.S**, i32**, [2 x %struct.S]**, [2 x i32]**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, %struct.S**, i32**, [2 x %struct.S]**, [2 x i32]**, i32**)* +// CHECK-NEXT: call void [[TMP25]](i8* [[TMP24]], %struct.S** [[DOTFIRSTPRIV_PTR_ADDR_I]], i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [2 x %struct.S]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [2 x i32]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], i32** [[DOTFIRSTPRIV_PTR_ADDR4_I]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP26:%.*]] = load %struct.S*, %struct.S** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP28:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP29:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR4_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP31]] to i32 +// CHECK-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] +// CHECK: omp.inner.for.cond.i: +// CHECK-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK-NEXT: [[CONV5_I:%.*]] = sext i32 [[TMP32]] to i64 +// CHECK-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV5_I]], [[TMP33]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] +// CHECK: omp.inner.for.body.i: +// CHECK-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK-NEXT: store i32 [[TMP34]], i32* [[I_I]], align 4, !noalias !14 +// CHECK-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP29]], i64 0, i64 0 +// CHECK-NEXT: store i32 [[TMP35]], i32* [[ARRAYIDX_I]], align 4 +// CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP28]], i64 0, i64 0 +// CHECK-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[ARRAYIDX6_I]] to i8* +// CHECK-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[TMP26]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP36]], i8* align 8 [[TMP37]], i64 8, i1 false) +// CHECK-NEXT: store i32 33, i32* [[TMP30]], align 4 +// CHECK-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK-NEXT: [[ADD7_I:%.*]] = add nsw i32 [[TMP38]], 1 +// CHECK-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND_I]] +// CHECK: .omp_outlined..1.exit: +// CHECK-NEXT: ret i32 0 +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_dup. +// CHECK-SAME: (%struct.kmp_task_t_with_privates* noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]]) #[[ATTR7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates* [[TMP0]], %struct.kmp_task_t_with_privates** [[DOTADDR]], align 8 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-NEXT: store i32 [[TMP2]], i32* [[DOTADDR2]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP5]], i32 0, i32 0 +// CHECK-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 +// CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP8]], i32 0, i32 0 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP9]], i32 0, i32 0 +// CHECK-NEXT: [[TMP12:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP11]], align 8 +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP10]], i32 0, i32 0 +// CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP12]] to %struct.S* +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK: omp.arraycpy.body: +// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[OMP_ARRAYCPY_SRCELEMENTPAST]], double noundef 0.000000e+00) +// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK: omp.arraycpy.done3: +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP8]], i32 0, i32 1 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP9]], i32 0, i32 1 +// CHECK-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[TMP16]], align 8 +// CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP15]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP17]], double noundef 0.000000e+00) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_destructor. +// CHECK-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// CHECK-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP2]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 1 +// CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP5]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK: arraydestroy.body: +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK: arraydestroy.done2: +// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP7]] +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdED1Ev +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIdED2Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]]) #[[ATTR4]] +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_Z5tmainIiET_v +// CHECK-SAME: () #[[ATTR9:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// CHECK-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]], i32 noundef 0) +// CHECK-NEXT: store i32 0, i32* [[T_VAR]], align 128 +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) +// CHECK-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 +// CHECK-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_VAR]], align 128 +// CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* +// CHECK-NEXT: store i32 [[TMP1]], i32* [[CONV]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [2 x i32]* [[VEC]], i64 [[TMP2]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[VAR]]) +// CHECK-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK: arraydestroy.body: +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK: arraydestroy.done1: +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP4]] +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ev +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: store double 0.000000e+00, double* [[F]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC2ERKS0_d +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[S:%.*]], double noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[S_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[T_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 +// CHECK-NEXT: store double [[T]], double* [[T_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 +// CHECK-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[F2]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load double, double* [[T_ADDR]], align 8 +// CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[TMP2]] +// CHECK-NEXT: store double [[ADD]], double* [[F]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ed +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], double noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store double [[A]], double* [[A_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[A_ADDR]], align 8 +// CHECK-NEXT: store double [[TMP0]], double* [[F]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdED2Ev +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_i +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// CHECK-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_ADDR]], align 4 +// CHECK-NEXT: call void @_ZN1SIiEC2ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0]], i32 noundef [[TMP1]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2 +// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 +// CHECK-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 +// CHECK-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 +// CHECK-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 +// CHECK-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 +// CHECK-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 +// CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK: omp_if.then: +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP7]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP8]], align 8 +// CHECK-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: [[TMP9:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 9, i64 256, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.2*)* @.omp_task_entry..5 to i32 (i32, i8*)*)) +// CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to %struct.kmp_task_t_with_privates.2* +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP10]], i32 0, i32 0 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 0 +// CHECK-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 128 +// CHECK-NEXT: [[TMP14:%.*]] = bitcast %struct.anon.1* [[AGG_CAPTURED]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP13]], i8* align 8 [[TMP14]], i64 16, i1 false) +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP10]], i32 0, i32 2 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP15]], i32 0, i32 0 +// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP16]], align 128 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP15]], i32 0, i32 1 +// CHECK-NEXT: [[TMP19:%.*]] = bitcast [2 x i32]* [[TMP18]] to i8* +// CHECK-NEXT: [[TMP20:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 8, i1 false) +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP15]], i32 0, i32 2 +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP21]], i32 0, i32 0 +// CHECK-NEXT: [[TMP22:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP23]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK: omp.arraycpy.body: +// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP22]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 noundef 0) +// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK: omp.arraycpy.done1: +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP15]], i32 0, i32 3 +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP24]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP2]], i32 noundef 0) +// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 3 +// CHECK-NEXT: [[TMP26:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP25]] to i32 (i32, i8*)** +// CHECK-NEXT: store i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.2*)* @.omp_task_destructor..7 to i32 (i32, i8*)*), i32 (i32, i8*)** [[TMP26]], align 8 +// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 5 +// CHECK-NEXT: store i64 0, i64* [[TMP27]], align 8 +// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 6 +// CHECK-NEXT: store i64 9, i64* [[TMP28]], align 16 +// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 7 +// CHECK-NEXT: store i64 1, i64* [[TMP29]], align 8 +// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 9 +// CHECK-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i8* +// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP31]], i8 0, i64 8, i1 false) +// CHECK-NEXT: [[TMP32:%.*]] = load i64, i64* [[TMP29]], align 8 +// CHECK-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP9]], i32 1, i64* [[TMP27]], i64* [[TMP28]], i64 [[TMP32]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2*, i32)* @.omp_task_dup..6 to i8*)) +// CHECK-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: br label [[OMP_IF_END]] +// CHECK: omp_if.end: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_privates_map..4 +// CHECK-SAME: (%struct..kmp_privates.t.3* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]], [2 x i32]** noalias noundef [[TMP2:%.*]], [2 x %struct.S.0]** noalias noundef [[TMP3:%.*]], %struct.S.0** noalias noundef [[TMP4:%.*]]) #[[ATTR6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.3*, align 8 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 8 +// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca [2 x i32]**, align 8 +// CHECK-NEXT: [[DOTADDR3:%.*]] = alloca [2 x %struct.S.0]**, align 8 +// CHECK-NEXT: [[DOTADDR4:%.*]] = alloca %struct.S.0**, align 8 +// CHECK-NEXT: store %struct..kmp_privates.t.3* [[TMP0]], %struct..kmp_privates.t.3** [[DOTADDR]], align 8 +// CHECK-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 8 +// CHECK-NEXT: store [2 x i32]** [[TMP2]], [2 x i32]*** [[DOTADDR2]], align 8 +// CHECK-NEXT: store [2 x %struct.S.0]** [[TMP3]], [2 x %struct.S.0]*** [[DOTADDR3]], align 8 +// CHECK-NEXT: store %struct.S.0** [[TMP4]], %struct.S.0*** [[DOTADDR4]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load %struct..kmp_privates.t.3*, %struct..kmp_privates.t.3** [[DOTADDR]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 0 +// CHECK-NEXT: [[TMP7:%.*]] = load i32**, i32*** [[DOTADDR1]], align 8 +// CHECK-NEXT: store i32* [[TMP6]], i32** [[TMP7]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 1 +// CHECK-NEXT: [[TMP9:%.*]] = load [2 x i32]**, [2 x i32]*** [[DOTADDR2]], align 8 +// CHECK-NEXT: store [2 x i32]* [[TMP8]], [2 x i32]** [[TMP9]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 2 +// CHECK-NEXT: [[TMP11:%.*]] = load [2 x %struct.S.0]**, [2 x %struct.S.0]*** [[DOTADDR3]], align 8 +// CHECK-NEXT: store [2 x %struct.S.0]* [[TMP10]], [2 x %struct.S.0]** [[TMP11]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 3 +// CHECK-NEXT: [[TMP13:%.*]] = load %struct.S.0**, %struct.S.0*** [[DOTADDR4]], align 8 +// CHECK-NEXT: store %struct.S.0* [[TMP12]], %struct.S.0** [[TMP13]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_entry..5 +// CHECK-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// CHECK-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[DOTLB__ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTUB__ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca [2 x i32]*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [2 x %struct.S.0]*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[I_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 128 +// CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 2 +// CHECK-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.3* [[TMP9]] to i8* +// CHECK-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.2* [[TMP3]] to i8* +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 +// CHECK-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 +// CHECK-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 16 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 7 +// CHECK-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP16]], align 8 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 8 +// CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 64 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 +// CHECK-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]]) +// CHECK-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !28 +// CHECK-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.3*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* @.omp_task_privates_map..4 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !28 +// CHECK-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: [[TMP22:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* +// CHECK-NEXT: call void [[TMP25]](i8* [[TMP24]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], [2 x i32]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [2 x %struct.S.0]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], %struct.S.0** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: [[TMP27:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !28 +// CHECK-NEXT: [[TMP28:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !28 +// CHECK-NEXT: [[TMP29:%.*]] = load %struct.S.0*, %struct.S.0** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !28 +// CHECK-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP30]] to i32 +// CHECK-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !28 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] +// CHECK: omp.inner.for.cond.i: +// CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !28 +// CHECK-NEXT: [[CONV4_I:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV4_I]], [[TMP32]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__3_EXIT:%.*]] +// CHECK: omp.inner.for.body.i: +// CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !28 +// CHECK-NEXT: store i32 [[TMP33]], i32* [[I_I]], align 4, !noalias !28 +// CHECK-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP26]], align 128 +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP27]], i64 0, i64 0 +// CHECK-NEXT: store i32 [[TMP34]], i32* [[ARRAYIDX_I]], align 4 +// CHECK-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP28]], i64 0, i64 0 +// CHECK-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5_I]] to i8* +// CHECK-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[TMP29]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i64 4, i1 false) +// CHECK-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !28 +// CHECK-NEXT: [[ADD6_I:%.*]] = add nsw i32 [[TMP37]], 1 +// CHECK-NEXT: store i32 [[ADD6_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !28 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND_I]] +// CHECK: .omp_outlined..3.exit: +// CHECK-NEXT: ret i32 0 +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_dup..6 +// CHECK-SAME: (%struct.kmp_task_t_with_privates.2* noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]]) #[[ATTR7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP0]], %struct.kmp_task_t_with_privates.2** [[DOTADDR]], align 8 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: store i32 [[TMP2]], i32* [[DOTADDR2]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP5]], i32 0, i32 0 +// CHECK-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 128 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 2 +// CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP8]], i32 0, i32 2 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP9]], i32 0, i32 0 +// CHECK-NEXT: [[TMP12:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP11]], align 8 +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP10]], i32 0, i32 0 +// CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S.0]* [[TMP12]] to %struct.S.0* +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK: omp.arraycpy.body: +// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 noundef 0) +// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK: omp.arraycpy.done3: +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP8]], i32 0, i32 3 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP9]], i32 0, i32 1 +// CHECK-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP16]], align 8 +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP15]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP17]], i32 noundef 0) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_destructor..7 +// CHECK-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP2]], i32 0, i32 2 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP3]], i32 0, i32 2 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP3]], i32 0, i32 3 +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP5]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK: arraydestroy.body: +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK: arraydestroy.done2: +// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP7]] +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: store i32 0, i32* [[F]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_i +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// CHECK-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 +// CHECK-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_ADDR]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] +// CHECK-NEXT: store i32 [[ADD]], i32* [[F]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: ret void +// +// +// LAMBDA-LABEL: define {{[^@]+}}@main +// LAMBDA-SAME: () #[[ATTR0:[0-9]+]] { +// LAMBDA-NEXT: entry: +// LAMBDA-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 1 +// LAMBDA-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// LAMBDA-NEXT: call void @"_ZZ4mainENK3$_0clEv"(%class.anon* noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) +// LAMBDA-NEXT: ret i32 0 +// +// +// LAMBDA-LABEL: define {{[^@]+}}@.omp_outlined. +// LAMBDA-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// LAMBDA-NEXT: entry: +// LAMBDA-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// LAMBDA-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// LAMBDA-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// LAMBDA-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// LAMBDA-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// LAMBDA-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// LAMBDA-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// LAMBDA-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]]) +// LAMBDA-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +// LAMBDA-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// LAMBDA: omp_if.then: +// LAMBDA-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// LAMBDA-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// LAMBDA-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.kmp_task_t_with_privates* +// LAMBDA-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP5]], i32 0, i32 0 +// LAMBDA-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP5]], i32 0, i32 1 +// LAMBDA-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP7]], i32 0, i32 0 +// LAMBDA-NEXT: [[TMP9:%.*]] = load volatile double, double* @g, align 8 +// LAMBDA-NEXT: store volatile double [[TMP9]], double* [[TMP8]], align 8 +// LAMBDA-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP7]], i32 0, i32 1 +// LAMBDA-NEXT: [[TMP11:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 +// LAMBDA-NEXT: store i32 [[TMP11]], i32* [[TMP10]], align 8 +// LAMBDA-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 5 +// LAMBDA-NEXT: store i64 0, i64* [[TMP12]], align 8 +// LAMBDA-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 6 +// LAMBDA-NEXT: store i64 9, i64* [[TMP13]], align 8 +// LAMBDA-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 7 +// LAMBDA-NEXT: store i64 1, i64* [[TMP14]], align 8 +// LAMBDA-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 9 +// LAMBDA-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i8* +// LAMBDA-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP16]], i8 0, i64 8, i1 false) +// LAMBDA-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP14]], align 8 +// LAMBDA-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP4]], i32 1, i64* [[TMP12]], i64* [[TMP13]], i64 [[TMP17]], i32 1, i32 0, i64 0, i8* null) +// LAMBDA-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// LAMBDA-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// LAMBDA-NEXT: br label [[OMP_IF_END]] +// LAMBDA: omp_if.end: +// LAMBDA-NEXT: ret void +// +// +// LAMBDA-LABEL: define {{[^@]+}}@.omp_task_privates_map. +// LAMBDA-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], double** noalias noundef [[TMP1:%.*]], i32** noalias noundef [[TMP2:%.*]]) #[[ATTR5:[0-9]+]] { +// LAMBDA-NEXT: entry: +// LAMBDA-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 +// LAMBDA-NEXT: [[DOTADDR1:%.*]] = alloca double**, align 8 +// LAMBDA-NEXT: [[DOTADDR2:%.*]] = alloca i32**, align 8 +// LAMBDA-NEXT: store %struct..kmp_privates.t* [[TMP0]], %struct..kmp_privates.t** [[DOTADDR]], align 8 +// LAMBDA-NEXT: store double** [[TMP1]], double*** [[DOTADDR1]], align 8 +// LAMBDA-NEXT: store i32** [[TMP2]], i32*** [[DOTADDR2]], align 8 +// LAMBDA-NEXT: [[TMP3:%.*]] = load %struct..kmp_privates.t*, %struct..kmp_privates.t** [[DOTADDR]], align 8 +// LAMBDA-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 0 +// LAMBDA-NEXT: [[TMP5:%.*]] = load double**, double*** [[DOTADDR1]], align 8 +// LAMBDA-NEXT: store double* [[TMP4]], double** [[TMP5]], align 8 +// LAMBDA-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 1 +// LAMBDA-NEXT: [[TMP7:%.*]] = load i32**, i32*** [[DOTADDR2]], align 8 +// LAMBDA-NEXT: store i32* [[TMP6]], i32** [[TMP7]], align 8 +// LAMBDA-NEXT: ret void +// +// +// LAMBDA-LABEL: define {{[^@]+}}@.omp_task_entry. +// LAMBDA-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias noundef [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] { +// LAMBDA-NEXT: entry: +// LAMBDA-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// LAMBDA-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// LAMBDA-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// LAMBDA-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// LAMBDA-NEXT: [[DOTLB__ADDR_I:%.*]] = alloca i64, align 8 +// LAMBDA-NEXT: [[DOTUB__ADDR_I:%.*]] = alloca i64, align 8 +// LAMBDA-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 +// LAMBDA-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 +// LAMBDA-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// LAMBDA-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca double*, align 8 +// LAMBDA-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8 +// LAMBDA-NEXT: [[I_I:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: [[REF_TMP_I:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// LAMBDA-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// LAMBDA-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// LAMBDA-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// LAMBDA-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// LAMBDA-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// LAMBDA-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0 +// LAMBDA-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// LAMBDA-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// LAMBDA-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// LAMBDA-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// LAMBDA-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 +// LAMBDA-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* +// LAMBDA-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* +// LAMBDA-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 +// LAMBDA-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 +// LAMBDA-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 +// LAMBDA-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8 +// LAMBDA-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 7 +// LAMBDA-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP16]], align 8 +// LAMBDA-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 8 +// LAMBDA-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 +// LAMBDA-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 +// LAMBDA-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 +// LAMBDA-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// LAMBDA-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// LAMBDA-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// LAMBDA-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// LAMBDA-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// LAMBDA-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// LAMBDA-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, double**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// LAMBDA-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, double**, i32**)* +// LAMBDA-NEXT: call void [[TMP25]](i8* [[TMP24]], double** [[DOTFIRSTPRIV_PTR_ADDR_I]], i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR3:[0-9]+]] +// LAMBDA-NEXT: [[TMP26:%.*]] = load double*, double** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP28]] to i32 +// LAMBDA-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// LAMBDA-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] +// LAMBDA: omp.inner.for.cond.i: +// LAMBDA-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// LAMBDA-NEXT: [[CONV2_I:%.*]] = sext i32 [[TMP29]] to i64 +// LAMBDA-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV2_I]], [[TMP30]] +// LAMBDA-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] +// LAMBDA: omp.inner.for.body.i: +// LAMBDA-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// LAMBDA-NEXT: store i32 [[TMP31]], i32* [[I_I]], align 4, !noalias !14 +// LAMBDA-NEXT: store double 1.000000e+00, double* [[TMP26]], align 8 +// LAMBDA-NEXT: store i32 11, i32* [[TMP27]], align 4 +// LAMBDA-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP_I]], i32 0, i32 0 +// LAMBDA-NEXT: store double* [[TMP26]], double** [[TMP32]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP_I]], i32 0, i32 1 +// LAMBDA-NEXT: store i32* [[TMP27]], i32** [[TMP33]], align 8, !noalias !14 +// LAMBDA-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(16) [[REF_TMP_I]]) +// LAMBDA-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// LAMBDA-NEXT: [[ADD3_I:%.*]] = add nsw i32 [[TMP34]], 1 +// LAMBDA-NEXT: store i32 [[ADD3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// LAMBDA-NEXT: br label [[OMP_INNER_FOR_COND_I]] +// LAMBDA: .omp_outlined..1.exit: +// LAMBDA-NEXT: ret i32 0 +// +// +// BLOCKS-LABEL: define {{[^@]+}}@main +// BLOCKS-SAME: () #[[ATTR1:[0-9]+]] { +// BLOCKS-NEXT: entry: +// BLOCKS-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// BLOCKS-NEXT: [[TMP0:%.*]] = load i8*, i8** getelementptr inbounds ([[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global to %struct.__block_literal_generic*), i32 0, i32 3), align 8 +// BLOCKS-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to void (i8*)* +// BLOCKS-NEXT: call void [[TMP1]](i8* noundef bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global to i8*)) +// BLOCKS-NEXT: ret i32 0 +// +// +// BLOCKS-LABEL: define {{[^@]+}}@__main_block_invoke +// BLOCKS-SAME: (i8* noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR2:[0-9]+]] { +// BLOCKS-NEXT: entry: +// BLOCKS-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 +// BLOCKS-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>*, align 8 +// BLOCKS-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 +// BLOCKS-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* +// BLOCKS-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 +// BLOCKS-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// BLOCKS-NEXT: ret void +// +// +// BLOCKS-LABEL: define {{[^@]+}}@.omp_outlined. +// BLOCKS-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// BLOCKS-NEXT: entry: +// BLOCKS-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// BLOCKS-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// BLOCKS-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// BLOCKS-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// BLOCKS-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// BLOCKS-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// BLOCKS-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// BLOCKS-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// BLOCKS-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +// BLOCKS-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// BLOCKS: omp_if.then: +// BLOCKS-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// BLOCKS-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// BLOCKS-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.kmp_task_t_with_privates* +// BLOCKS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP5]], i32 0, i32 0 +// BLOCKS-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP5]], i32 0, i32 1 +// BLOCKS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP7]], i32 0, i32 0 +// BLOCKS-NEXT: [[TMP9:%.*]] = load volatile double, double* @g, align 8 +// BLOCKS-NEXT: store volatile double [[TMP9]], double* [[TMP8]], align 8 +// BLOCKS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP7]], i32 0, i32 1 +// BLOCKS-NEXT: [[TMP11:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 +// BLOCKS-NEXT: store i32 [[TMP11]], i32* [[TMP10]], align 8 +// BLOCKS-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 5 +// BLOCKS-NEXT: store i64 0, i64* [[TMP12]], align 8 +// BLOCKS-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 6 +// BLOCKS-NEXT: store i64 9, i64* [[TMP13]], align 8 +// BLOCKS-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 7 +// BLOCKS-NEXT: store i64 1, i64* [[TMP14]], align 8 +// BLOCKS-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 9 +// BLOCKS-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i8* +// BLOCKS-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP16]], i8 0, i64 8, i1 false) +// BLOCKS-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP14]], align 8 +// BLOCKS-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP4]], i32 1, i64* [[TMP12]], i64* [[TMP13]], i64 [[TMP17]], i32 1, i32 0, i64 0, i8* null) +// BLOCKS-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// BLOCKS-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// BLOCKS-NEXT: br label [[OMP_IF_END]] +// BLOCKS: omp_if.end: +// BLOCKS-NEXT: ret void +// +// +// BLOCKS-LABEL: define {{[^@]+}}@_block_invoke +// BLOCKS-SAME: (i8* noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR2]] { +// BLOCKS-NEXT: entry: +// BLOCKS-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 +// BLOCKS-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>*, align 8 +// BLOCKS-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 +// BLOCKS-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* +// BLOCKS-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>** [[BLOCK_ADDR]], align 8 +// BLOCKS-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], i32 0, i32 5 +// BLOCKS-NEXT: store double 2.000000e+00, double* [[BLOCK_CAPTURE_ADDR]], align 8 +// BLOCKS-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], i32 0, i32 6 +// BLOCKS-NEXT: store i32 22, i32* [[BLOCK_CAPTURE_ADDR1]], align 8 +// BLOCKS-NEXT: ret void +// +// +// BLOCKS-LABEL: define {{[^@]+}}@.omp_task_privates_map. +// BLOCKS-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], double** noalias noundef [[TMP1:%.*]], i32** noalias noundef [[TMP2:%.*]]) #[[ATTR6:[0-9]+]] { +// BLOCKS-NEXT: entry: +// BLOCKS-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 +// BLOCKS-NEXT: [[DOTADDR1:%.*]] = alloca double**, align 8 +// BLOCKS-NEXT: [[DOTADDR2:%.*]] = alloca i32**, align 8 +// BLOCKS-NEXT: store %struct..kmp_privates.t* [[TMP0]], %struct..kmp_privates.t** [[DOTADDR]], align 8 +// BLOCKS-NEXT: store double** [[TMP1]], double*** [[DOTADDR1]], align 8 +// BLOCKS-NEXT: store i32** [[TMP2]], i32*** [[DOTADDR2]], align 8 +// BLOCKS-NEXT: [[TMP3:%.*]] = load %struct..kmp_privates.t*, %struct..kmp_privates.t** [[DOTADDR]], align 8 +// BLOCKS-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 0 +// BLOCKS-NEXT: [[TMP5:%.*]] = load double**, double*** [[DOTADDR1]], align 8 +// BLOCKS-NEXT: store double* [[TMP4]], double** [[TMP5]], align 8 +// BLOCKS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 1 +// BLOCKS-NEXT: [[TMP7:%.*]] = load i32**, i32*** [[DOTADDR2]], align 8 +// BLOCKS-NEXT: store i32* [[TMP6]], i32** [[TMP7]], align 8 +// BLOCKS-NEXT: ret void +// +// +// BLOCKS-LABEL: define {{[^@]+}}@.omp_task_entry. +// BLOCKS-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { +// BLOCKS-NEXT: entry: +// BLOCKS-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// BLOCKS-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// BLOCKS-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// BLOCKS-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// BLOCKS-NEXT: [[DOTLB__ADDR_I:%.*]] = alloca i64, align 8 +// BLOCKS-NEXT: [[DOTUB__ADDR_I:%.*]] = alloca i64, align 8 +// BLOCKS-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 +// BLOCKS-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 +// BLOCKS-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// BLOCKS-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca double*, align 8 +// BLOCKS-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8 +// BLOCKS-NEXT: [[I_I:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: [[BLOCK_I:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, align 8 +// BLOCKS-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// BLOCKS-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// BLOCKS-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// BLOCKS-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// BLOCKS-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// BLOCKS-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0 +// BLOCKS-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// BLOCKS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// BLOCKS-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// BLOCKS-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// BLOCKS-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 +// BLOCKS-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* +// BLOCKS-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* +// BLOCKS-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 +// BLOCKS-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 +// BLOCKS-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 +// BLOCKS-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8 +// BLOCKS-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 7 +// BLOCKS-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP16]], align 8 +// BLOCKS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 8 +// BLOCKS-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 +// BLOCKS-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 +// BLOCKS-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 +// BLOCKS-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// BLOCKS-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// BLOCKS-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// BLOCKS-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// BLOCKS-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// BLOCKS-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// BLOCKS-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, double**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// BLOCKS-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, double**, i32**)* +// BLOCKS-NEXT: call void [[TMP25]](i8* [[TMP24]], double** [[DOTFIRSTPRIV_PTR_ADDR_I]], i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR4:[0-9]+]] +// BLOCKS-NEXT: [[TMP26:%.*]] = load double*, double** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP28]] to i32 +// BLOCKS-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// BLOCKS-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] +// BLOCKS: omp.inner.for.cond.i: +// BLOCKS-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// BLOCKS-NEXT: [[CONV2_I:%.*]] = sext i32 [[TMP29]] to i64 +// BLOCKS-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV2_I]], [[TMP30]] +// BLOCKS-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] +// BLOCKS: omp.inner.for.body.i: +// BLOCKS-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// BLOCKS-NEXT: store i32 [[TMP31]], i32* [[I_I]], align 4, !noalias !14 +// BLOCKS-NEXT: store double 1.000000e+00, double* [[TMP26]], align 8 +// BLOCKS-NEXT: store i32 11, i32* [[TMP27]], align 4 +// BLOCKS-NEXT: [[BLOCK_ISA_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 0 +// BLOCKS-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[BLOCK_FLAGS_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 1 +// BLOCKS-NEXT: store i32 1073741824, i32* [[BLOCK_FLAGS_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[BLOCK_RESERVED_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 2 +// BLOCKS-NEXT: store i32 0, i32* [[BLOCK_RESERVED_I]], align 4, !noalias !14 +// BLOCKS-NEXT: [[BLOCK_INVOKE_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 3 +// BLOCKS-NEXT: store i8* bitcast (void (i8*)* @_block_invoke to i8*), i8** [[BLOCK_INVOKE_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[BLOCK_DESCRIPTOR_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 4 +// BLOCKS-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp.2 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[BLOCK_CAPTURED_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 5 +// BLOCKS-NEXT: [[TMP32:%.*]] = load volatile double, double* [[TMP26]], align 8 +// BLOCKS-NEXT: store volatile double [[TMP32]], double* [[BLOCK_CAPTURED_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[BLOCK_CAPTURED3_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 6 +// BLOCKS-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP27]], align 4 +// BLOCKS-NEXT: store i32 [[TMP33]], i32* [[BLOCK_CAPTURED3_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP34:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]] to void ()* +// BLOCKS-NEXT: [[BLOCK_LITERAL_I:%.*]] = bitcast void ()* [[TMP34]] to %struct.__block_literal_generic* +// BLOCKS-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL_I]], i32 0, i32 3 +// BLOCKS-NEXT: [[TMP36:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL_I]] to i8* +// BLOCKS-NEXT: [[TMP37:%.*]] = load i8*, i8** [[TMP35]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP38:%.*]] = bitcast i8* [[TMP37]] to void (i8*)* +// BLOCKS-NEXT: call void [[TMP38]](i8* noundef [[TMP36]]) #[[ATTR4]] +// BLOCKS-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// BLOCKS-NEXT: [[ADD4_I:%.*]] = add nsw i32 [[TMP39]], 1 +// BLOCKS-NEXT: store i32 [[ADD4_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// BLOCKS-NEXT: br label [[OMP_INNER_FOR_COND_I]] +// BLOCKS: .omp_outlined..1.exit: +// BLOCKS-NEXT: ret i32 0 +// +// +// ARRAY-LABEL: define {{[^@]+}}@_Z10array_funciPfP2St +// ARRAY-SAME: (i32 noundef [[N:%.*]], float* noundef [[A:%.*]], %struct.St* noundef [[S:%.*]]) #[[ATTR0:[0-9]+]] { +// ARRAY-NEXT: entry: +// ARRAY-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// ARRAY-NEXT: [[S_ADDR:%.*]] = alloca %struct.St*, align 8 +// ARRAY-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// ARRAY-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// ARRAY-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8 +// ARRAY-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// ARRAY-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// ARRAY-NEXT: [[TMP2:%.*]] = load float*, float** [[A_ADDR]], align 8 +// ARRAY-NEXT: [[TMP3:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 8 +// ARRAY-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, float*, %struct.St*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], float* [[TMP2]], %struct.St* [[TMP3]]) +// ARRAY-NEXT: ret void +// +// +// ARRAY-LABEL: define {{[^@]+}}@.omp_outlined. +// ARRAY-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], float* noundef [[A:%.*]], %struct.St* noundef [[S:%.*]]) #[[ATTR1:[0-9]+]] { +// ARRAY-NEXT: entry: +// ARRAY-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// ARRAY-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// ARRAY-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// ARRAY-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// ARRAY-NEXT: [[S_ADDR:%.*]] = alloca %struct.St*, align 8 +// ARRAY-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// ARRAY-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// ARRAY-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// ARRAY-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// ARRAY-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// ARRAY-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8 +// ARRAY-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// ARRAY-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// ARRAY-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// ARRAY-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// ARRAY-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// ARRAY-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// ARRAY: omp_if.then: +// ARRAY-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// ARRAY-NEXT: store i64 [[TMP0]], i64* [[TMP5]], align 8 +// ARRAY-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// ARRAY-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i64 96, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// ARRAY-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to %struct.kmp_task_t_with_privates* +// ARRAY-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP7]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 +// ARRAY-NEXT: [[TMP11:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* +// ARRAY-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 [[TMP11]], i64 8, i1 false) +// ARRAY-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP7]], i32 0, i32 1 +// ARRAY-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP12]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP14:%.*]] = load float*, float** [[A_ADDR]], align 8 +// ARRAY-NEXT: store float* [[TMP14]], float** [[TMP13]], align 8 +// ARRAY-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP12]], i32 0, i32 1 +// ARRAY-NEXT: [[TMP16:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 8 +// ARRAY-NEXT: store %struct.St* [[TMP16]], %struct.St** [[TMP15]], align 8 +// ARRAY-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 5 +// ARRAY-NEXT: store i64 0, i64* [[TMP17]], align 8 +// ARRAY-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 6 +// ARRAY-NEXT: store i64 9, i64* [[TMP18]], align 8 +// ARRAY-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 7 +// ARRAY-NEXT: store i64 1, i64* [[TMP19]], align 8 +// ARRAY-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 9 +// ARRAY-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i8* +// ARRAY-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP21]], i8 0, i64 8, i1 false) +// ARRAY-NEXT: [[TMP22:%.*]] = load i64, i64* [[TMP19]], align 8 +// ARRAY-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* [[TMP6]], i32 1, i64* [[TMP17]], i64* [[TMP18]], i64 [[TMP22]], i32 1, i32 0, i64 0, i8* null) +// ARRAY-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// ARRAY-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// ARRAY-NEXT: br label [[OMP_IF_END]] +// ARRAY: omp_if.end: +// ARRAY-NEXT: ret void +// +// +// ARRAY-LABEL: define {{[^@]+}}@.omp_task_privates_map. +// ARRAY-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], float*** noalias noundef [[TMP1:%.*]], %struct.St*** noalias noundef [[TMP2:%.*]]) #[[ATTR4:[0-9]+]] { +// ARRAY-NEXT: entry: +// ARRAY-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 +// ARRAY-NEXT: [[DOTADDR1:%.*]] = alloca float***, align 8 +// ARRAY-NEXT: [[DOTADDR2:%.*]] = alloca %struct.St***, align 8 +// ARRAY-NEXT: store %struct..kmp_privates.t* [[TMP0]], %struct..kmp_privates.t** [[DOTADDR]], align 8 +// ARRAY-NEXT: store float*** [[TMP1]], float**** [[DOTADDR1]], align 8 +// ARRAY-NEXT: store %struct.St*** [[TMP2]], %struct.St**** [[DOTADDR2]], align 8 +// ARRAY-NEXT: [[TMP3:%.*]] = load %struct..kmp_privates.t*, %struct..kmp_privates.t** [[DOTADDR]], align 8 +// ARRAY-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP5:%.*]] = load float***, float**** [[DOTADDR1]], align 8 +// ARRAY-NEXT: store float** [[TMP4]], float*** [[TMP5]], align 8 +// ARRAY-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 1 +// ARRAY-NEXT: [[TMP7:%.*]] = load %struct.St***, %struct.St**** [[DOTADDR2]], align 8 +// ARRAY-NEXT: store %struct.St** [[TMP6]], %struct.St*** [[TMP7]], align 8 +// ARRAY-NEXT: ret void +// +// +// ARRAY-LABEL: define {{[^@]+}}@.omp_task_entry. +// ARRAY-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias noundef [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] { +// ARRAY-NEXT: entry: +// ARRAY-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// ARRAY-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// ARRAY-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// ARRAY-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// ARRAY-NEXT: [[DOTLB__ADDR_I:%.*]] = alloca i64, align 8 +// ARRAY-NEXT: [[DOTUB__ADDR_I:%.*]] = alloca i64, align 8 +// ARRAY-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 +// ARRAY-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 +// ARRAY-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// ARRAY-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca float**, align 8 +// ARRAY-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca %struct.St**, align 8 +// ARRAY-NEXT: [[I_I:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// ARRAY-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// ARRAY-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// ARRAY-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// ARRAY-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// ARRAY-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// ARRAY-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// ARRAY-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// ARRAY-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 +// ARRAY-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* +// ARRAY-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* +// ARRAY-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 +// ARRAY-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 +// ARRAY-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 +// ARRAY-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8 +// ARRAY-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 7 +// ARRAY-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP16]], align 8 +// ARRAY-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 8 +// ARRAY-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 +// ARRAY-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 +// ARRAY-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 +// ARRAY-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// ARRAY-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// ARRAY-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// ARRAY-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// ARRAY-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// ARRAY-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// ARRAY-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, float***, %struct.St***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// ARRAY-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP24:%.*]] = load i64, i64* [[TMP23]], align 8 +// ARRAY-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP27:%.*]] = bitcast void (i8*, ...)* [[TMP25]] to void (i8*, float***, %struct.St***)* +// ARRAY-NEXT: call void [[TMP27]](i8* [[TMP26]], float*** [[DOTFIRSTPRIV_PTR_ADDR_I]], %struct.St*** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR2:[0-9]+]] +// ARRAY-NEXT: [[TMP28:%.*]] = load float**, float*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP29:%.*]] = load %struct.St**, %struct.St*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP30]] to i32 +// ARRAY-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// ARRAY-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] +// ARRAY: omp.inner.for.cond.i: +// ARRAY-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// ARRAY-NEXT: [[CONV2_I:%.*]] = sext i32 [[TMP31]] to i64 +// ARRAY-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV2_I]], [[TMP32]] +// ARRAY-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] +// ARRAY: omp.inner.for.body.i: +// ARRAY-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// ARRAY-NEXT: store i32 [[TMP33]], i32* [[I_I]], align 4, !noalias !14 +// ARRAY-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// ARRAY-NEXT: [[ADD3_I:%.*]] = add nsw i32 [[TMP34]], 1 +// ARRAY-NEXT: store i32 [[ADD3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// ARRAY-NEXT: br label [[OMP_INNER_FOR_COND_I]] +// ARRAY: .omp_outlined..1.exit: +// ARRAY-NEXT: ret i32 0 +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@main +// SIMD-ONLY0-SAME: () #[[ATTR0:[0-9]+]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// SIMD-ONLY0-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S]], align 8 +// SIMD-ONLY0-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// SIMD-ONLY0-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 16 +// SIMD-ONLY0-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 8 +// SIMD-ONLY0-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]], double noundef 0.000000e+00) +// SIMD-ONLY0-NEXT: store i32 0, i32* [[T_VAR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// SIMD-ONLY0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const.main.vec to i8*), i64 8, i1 false) +// SIMD-ONLY0-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_BEGIN]], double noundef 1.000000e+00) +// SIMD-ONLY0-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYINIT_BEGIN]], i64 1 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_ELEMENT]], double noundef 2.000000e+00) +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]], double noundef 3.000000e+00) +// SIMD-ONLY0-NEXT: store i32 0, i32* [[I]], align 4 +// SIMD-ONLY0-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY0: for.cond: +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY0-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10 +// SIMD-ONLY0-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY0: for.body: +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 +// SIMD-ONLY0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 +// SIMD-ONLY0-NEXT: store i32 [[TMP2]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY0-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = bitcast %struct.S* [[ARRAYIDX1]] to i8* +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// SIMD-ONLY0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 8 [[TMP4]], i64 8, i1 false) +// SIMD-ONLY0-NEXT: store i32 33, i32* @_ZZ4mainE5sivar, align 4 +// SIMD-ONLY0-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY0: for.inc: +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY0-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 +// SIMD-ONLY0-NEXT: store i32 [[INC]], i32* [[I]], align 4 +// SIMD-ONLY0-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] +// SIMD-ONLY0: for.end: +// SIMD-ONLY0-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() +// SIMD-ONLY0-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]]) #[[ATTR4:[0-9]+]] +// SIMD-ONLY0-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// SIMD-ONLY0-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// SIMD-ONLY0: arraydestroy.body: +// SIMD-ONLY0-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP6]], [[FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// SIMD-ONLY0-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// SIMD-ONLY0-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// SIMD-ONLY0: arraydestroy.done2: +// SIMD-ONLY0-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// SIMD-ONLY0-NEXT: ret i32 [[TMP7]] +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ev +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC2Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]]) +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdEC1ERKS0_d +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[S:%.*]], double noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: [[S_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: [[T_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store double [[T]], double* [[T_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load double, double* [[T_ADDR]], align 8 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC2ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP0]], double noundef [[TMP1]]) +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ed +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], double noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store double [[A]], double* [[A_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load double, double* [[A_ADDR]], align 8 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC2Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]], double noundef [[TMP0]]) +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_Z5tmainIiET_v +// SIMD-ONLY0-SAME: () #[[ATTR3:[0-9]+]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// SIMD-ONLY0-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0]], align 4 +// SIMD-ONLY0-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// SIMD-ONLY0-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// SIMD-ONLY0-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// SIMD-ONLY0-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// SIMD-ONLY0-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]], i32 noundef 0) +// SIMD-ONLY0-NEXT: store i32 0, i32* [[T_VAR]], align 128 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// SIMD-ONLY0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) +// SIMD-ONLY0-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// SIMD-ONLY0-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) +// SIMD-ONLY0-NEXT: store i32 0, i32* [[I]], align 4 +// SIMD-ONLY0-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY0: for.cond: +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY0-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10 +// SIMD-ONLY0-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY0: for.body: +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 128 +// SIMD-ONLY0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 +// SIMD-ONLY0-NEXT: store i32 [[TMP2]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY0-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = bitcast %struct.S.0* [[ARRAYIDX1]] to i8* +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* +// SIMD-ONLY0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i64 4, i1 false) +// SIMD-ONLY0-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY0: for.inc: +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY0-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 +// SIMD-ONLY0-NEXT: store i32 [[INC]], i32* [[I]], align 4 +// SIMD-ONLY0-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// SIMD-ONLY0: for.end: +// SIMD-ONLY0-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// SIMD-ONLY0-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// SIMD-ONLY0: arraydestroy.body: +// SIMD-ONLY0-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP6]], [[FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// SIMD-ONLY0-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// SIMD-ONLY0-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// SIMD-ONLY0: arraydestroy.done2: +// SIMD-ONLY0-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// SIMD-ONLY0-NEXT: ret i32 [[TMP7]] +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdED1Ev +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdED2Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ev +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: store double 0.000000e+00, double* [[F]], align 8 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdED2Ev +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdEC2ERKS0_d +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[S:%.*]], double noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: [[S_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: [[T_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store double [[T]], double* [[T_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP0]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load double, double* [[F2]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load double, double* [[T_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[TMP2]] +// SIMD-ONLY0-NEXT: store double [[ADD]], double* [[F]], align 8 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ed +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], double noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store double [[A]], double* [[A_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load double, double* [[A_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP0]], double* [[F]], align 8 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_i +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_ADDR]], align 4 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC2ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0]], i32 noundef [[TMP1]]) +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: store i32 0, i32* [[F]], align 4 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_i +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] +// SIMD-ONLY0-NEXT: store i32 [[ADD]], i32* [[F]], align 4 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@main +// SIMD-ONLY1-SAME: () #[[ATTR0:[0-9]+]] { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// SIMD-ONLY1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S]], align 8 +// SIMD-ONLY1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// SIMD-ONLY1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 16 +// SIMD-ONLY1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 8 +// SIMD-ONLY1-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]], double noundef 0.000000e+00) +// SIMD-ONLY1-NEXT: store i32 0, i32* [[T_VAR]], align 4 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// SIMD-ONLY1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const.main.vec to i8*), i64 8, i1 false) +// SIMD-ONLY1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_BEGIN]], double noundef 1.000000e+00) +// SIMD-ONLY1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYINIT_BEGIN]], i64 1 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_ELEMENT]], double noundef 2.000000e+00) +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]], double noundef 3.000000e+00) +// SIMD-ONLY1-NEXT: store i32 0, i32* [[I]], align 4 +// SIMD-ONLY1-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY1: for.cond: +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10 +// SIMD-ONLY1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY1: for.body: +// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 +// SIMD-ONLY1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 +// SIMD-ONLY1-NEXT: store i32 [[TMP2]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY1-NEXT: [[TMP3:%.*]] = bitcast %struct.S* [[ARRAYIDX1]] to i8* +// SIMD-ONLY1-NEXT: [[TMP4:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// SIMD-ONLY1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 8 [[TMP4]], i64 8, i1 false) +// SIMD-ONLY1-NEXT: store i32 33, i32* @_ZZ4mainE5sivar, align 4 +// SIMD-ONLY1-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY1: for.inc: +// SIMD-ONLY1-NEXT: [[TMP5:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 +// SIMD-ONLY1-NEXT: store i32 [[INC]], i32* [[I]], align 4 +// SIMD-ONLY1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] +// SIMD-ONLY1: for.end: +// SIMD-ONLY1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() +// SIMD-ONLY1-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]]) #[[ATTR4:[0-9]+]] +// SIMD-ONLY1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// SIMD-ONLY1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// SIMD-ONLY1: arraydestroy.body: +// SIMD-ONLY1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP6]], [[FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// SIMD-ONLY1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// SIMD-ONLY1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// SIMD-ONLY1: arraydestroy.done2: +// SIMD-ONLY1-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// SIMD-ONLY1-NEXT: ret i32 [[TMP7]] +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ev +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC2Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]]) +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdEC1ERKS0_d +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[S:%.*]], double noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: [[T_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store double [[T]], double* [[T_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load double, double* [[T_ADDR]], align 8 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC2ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP0]], double noundef [[TMP1]]) +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ed +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], double noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store double [[A]], double* [[A_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load double, double* [[A_ADDR]], align 8 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC2Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]], double noundef [[TMP0]]) +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_Z5tmainIiET_v +// SIMD-ONLY1-SAME: () #[[ATTR3:[0-9]+]] { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// SIMD-ONLY1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0]], align 4 +// SIMD-ONLY1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// SIMD-ONLY1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// SIMD-ONLY1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// SIMD-ONLY1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// SIMD-ONLY1-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]], i32 noundef 0) +// SIMD-ONLY1-NEXT: store i32 0, i32* [[T_VAR]], align 128 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// SIMD-ONLY1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) +// SIMD-ONLY1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// SIMD-ONLY1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) +// SIMD-ONLY1-NEXT: store i32 0, i32* [[I]], align 4 +// SIMD-ONLY1-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY1: for.cond: +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10 +// SIMD-ONLY1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY1: for.body: +// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 128 +// SIMD-ONLY1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 +// SIMD-ONLY1-NEXT: store i32 [[TMP2]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY1-NEXT: [[TMP3:%.*]] = bitcast %struct.S.0* [[ARRAYIDX1]] to i8* +// SIMD-ONLY1-NEXT: [[TMP4:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* +// SIMD-ONLY1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i64 4, i1 false) +// SIMD-ONLY1-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY1: for.inc: +// SIMD-ONLY1-NEXT: [[TMP5:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 +// SIMD-ONLY1-NEXT: store i32 [[INC]], i32* [[I]], align 4 +// SIMD-ONLY1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// SIMD-ONLY1: for.end: +// SIMD-ONLY1-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// SIMD-ONLY1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// SIMD-ONLY1: arraydestroy.body: +// SIMD-ONLY1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP6]], [[FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// SIMD-ONLY1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// SIMD-ONLY1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// SIMD-ONLY1: arraydestroy.done2: +// SIMD-ONLY1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// SIMD-ONLY1-NEXT: ret i32 [[TMP7]] +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdED1Ev +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdED2Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ev +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: store double 0.000000e+00, double* [[F]], align 8 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdED2Ev +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdEC2ERKS0_d +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[S:%.*]], double noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: [[T_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store double [[T]], double* [[T_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP0]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load double, double* [[F2]], align 8 +// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = load double, double* [[T_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[TMP2]] +// SIMD-ONLY1-NEXT: store double [[ADD]], double* [[F]], align 8 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ed +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], double noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store double [[A]], double* [[A_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load double, double* [[A_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store double [[TMP0]], double* [[F]], align 8 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_i +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_ADDR]], align 4 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC2ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0]], i32 noundef [[TMP1]]) +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: store i32 0, i32* [[F]], align 4 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_i +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 +// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] +// SIMD-ONLY1-NEXT: store i32 [[ADD]], i32* [[F]], align 4 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// SIMD-ONLY1-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@main +// SIMD-ONLY2-SAME: () #[[ATTR0:[0-9]+]] { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 1 +// SIMD-ONLY2-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// SIMD-ONLY2-NEXT: call void @"_ZZ4mainENK3$_0clEv"(%class.anon* noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) +// SIMD-ONLY2-NEXT: ret i32 0 +// +// +// SIMD-ONLY3-LABEL: define {{[^@]+}}@main +// SIMD-ONLY3-SAME: () #[[ATTR1:[0-9]+]] { +// SIMD-ONLY3-NEXT: entry: +// SIMD-ONLY3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SIMD-ONLY3-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// SIMD-ONLY3-NEXT: [[TMP0:%.*]] = load i8*, i8** getelementptr inbounds ([[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global to %struct.__block_literal_generic*), i32 0, i32 3), align 8 +// SIMD-ONLY3-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to void (i8*)* +// SIMD-ONLY3-NEXT: call void [[TMP1]](i8* noundef bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global to i8*)) +// SIMD-ONLY3-NEXT: ret i32 0 +// +// +// SIMD-ONLY3-LABEL: define {{[^@]+}}@__main_block_invoke +// SIMD-ONLY3-SAME: (i8* noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR2:[0-9]+]] { +// SIMD-ONLY3-NEXT: entry: +// SIMD-ONLY3-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 +// SIMD-ONLY3-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>*, align 8 +// SIMD-ONLY3-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY3-NEXT: [[BLOCK1:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, align 8 +// SIMD-ONLY3-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 +// SIMD-ONLY3-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* +// SIMD-ONLY3-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 +// SIMD-ONLY3-NEXT: store i32 0, i32* [[I]], align 4 +// SIMD-ONLY3-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY3: for.cond: +// SIMD-ONLY3-NEXT: [[TMP0:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10 +// SIMD-ONLY3-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY3: for.body: +// SIMD-ONLY3-NEXT: store double 1.000000e+00, double* @g, align 8 +// SIMD-ONLY3-NEXT: store i32 11, i32* @_ZZ4mainE5sivar, align 4 +// SIMD-ONLY3-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK1]], i32 0, i32 0 +// SIMD-ONLY3-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA]], align 8 +// SIMD-ONLY3-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK1]], i32 0, i32 1 +// SIMD-ONLY3-NEXT: store i32 1073741824, i32* [[BLOCK_FLAGS]], align 8 +// SIMD-ONLY3-NEXT: [[BLOCK_RESERVED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK1]], i32 0, i32 2 +// SIMD-ONLY3-NEXT: store i32 0, i32* [[BLOCK_RESERVED]], align 4 +// SIMD-ONLY3-NEXT: [[BLOCK_INVOKE:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK1]], i32 0, i32 3 +// SIMD-ONLY3-NEXT: store i8* bitcast (void (i8*)* @__main_block_invoke_2 to i8*), i8** [[BLOCK_INVOKE]], align 8 +// SIMD-ONLY3-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK1]], i32 0, i32 4 +// SIMD-ONLY3-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp.1 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR]], align 8 +// SIMD-ONLY3-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK1]], i32 0, i32 5 +// SIMD-ONLY3-NEXT: [[TMP1:%.*]] = load volatile double, double* @g, align 8 +// SIMD-ONLY3-NEXT: store volatile double [[TMP1]], double* [[BLOCK_CAPTURED]], align 8 +// SIMD-ONLY3-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK1]], i32 0, i32 6 +// SIMD-ONLY3-NEXT: [[TMP2:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 +// SIMD-ONLY3-NEXT: store i32 [[TMP2]], i32* [[BLOCK_CAPTURED2]], align 8 +// SIMD-ONLY3-NEXT: [[TMP3:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK1]] to void ()* +// SIMD-ONLY3-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP3]] to %struct.__block_literal_generic* +// SIMD-ONLY3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// SIMD-ONLY3-NEXT: [[TMP5:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// SIMD-ONLY3-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP4]], align 8 +// SIMD-ONLY3-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to void (i8*)* +// SIMD-ONLY3-NEXT: call void [[TMP7]](i8* noundef [[TMP5]]) +// SIMD-ONLY3-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY3: for.inc: +// SIMD-ONLY3-NEXT: [[TMP8:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 +// SIMD-ONLY3-NEXT: store i32 [[INC]], i32* [[I]], align 4 +// SIMD-ONLY3-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] +// SIMD-ONLY3: for.end: +// SIMD-ONLY3-NEXT: ret void +// +// +// SIMD-ONLY3-LABEL: define {{[^@]+}}@__main_block_invoke_2 +// SIMD-ONLY3-SAME: (i8* noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR2]] { +// SIMD-ONLY3-NEXT: entry: +// SIMD-ONLY3-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 +// SIMD-ONLY3-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>*, align 8 +// SIMD-ONLY3-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 +// SIMD-ONLY3-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* +// SIMD-ONLY3-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>** [[BLOCK_ADDR]], align 8 +// SIMD-ONLY3-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], i32 0, i32 5 +// SIMD-ONLY3-NEXT: store double 2.000000e+00, double* [[BLOCK_CAPTURE_ADDR]], align 8 +// SIMD-ONLY3-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], i32 0, i32 6 +// SIMD-ONLY3-NEXT: store i32 22, i32* [[BLOCK_CAPTURE_ADDR1]], align 8 +// SIMD-ONLY3-NEXT: ret void +// +// +// SIMD-ONLY4-LABEL: define {{[^@]+}}@_Z10array_funciPfP2St +// SIMD-ONLY4-SAME: (i32 noundef [[N:%.*]], float* noundef [[A:%.*]], %struct.St* noundef [[S:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY4-NEXT: entry: +// SIMD-ONLY4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY4-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// SIMD-ONLY4-NEXT: [[S_ADDR:%.*]] = alloca %struct.St*, align 8 +// SIMD-ONLY4-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY4-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// SIMD-ONLY4-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8 +// SIMD-ONLY4-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY4-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// SIMD-ONLY4-NEXT: store i32 0, i32* [[I]], align 4 +// SIMD-ONLY4-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY4: for.cond: +// SIMD-ONLY4-NEXT: [[TMP2:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY4-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10 +// SIMD-ONLY4-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY4: for.body: +// SIMD-ONLY4-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY4: for.inc: +// SIMD-ONLY4-NEXT: [[TMP3:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// SIMD-ONLY4-NEXT: store i32 [[INC]], i32* [[I]], align 4 +// SIMD-ONLY4-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] +// SIMD-ONLY4: for.end: +// SIMD-ONLY4-NEXT: ret void +// diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp index f4ded3c7797e9..31e610d0ddffa 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s @@ -7,11 +8,10 @@ // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s -// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY3 %s +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY4 %s // expected-no-diagnostics #ifndef ARRAY @@ -30,15 +30,6 @@ struct S { volatile double g; -// CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, %union{{.+}}, %union{{.+}}, i64, i64, i64, i32, i8* } -// CHECK-DAG: [[S_DOUBLE_TY:%.+]] = type { double } -// CHECK-DAG: [[PRIVATES_MAIN_TY:%.+]] = type {{.?}}{ [2 x [[S_DOUBLE_TY]]], [[S_DOUBLE_TY]], i32, [2 x i32] -// CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type { [2 x [[S_DOUBLE_TY]]]*, [[S_DOUBLE_TY]]* } -// CHECK-DAG: [[KMP_TASK_MAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [[PRIVATES_MAIN_TY]] } -// CHECK-DAG: [[S_INT_TY:%.+]] = type { i32 } -// CHECK-DAG: [[CAP_TMAIN_TY:%.+]] = type { [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* } -// CHECK-DAG: [[PRIVATES_TMAIN_TY:%.+]] = type { i32, [2 x i32], [2 x [[S_INT_TY]]], [[S_INT_TY]], [104 x i8] } -// CHECK-DAG: [[KMP_TASK_TMAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [{{[0-9]+}} x i8], [[PRIVATES_TMAIN_TY]] } template T tmain() { S ttt; @@ -58,48 +49,14 @@ T tmain() { int main() { static int sivar; #ifdef LAMBDA - // LAMBDA: [[G:@.+]] ={{.*}} global double - // LAMBDA: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0, - // LAMBDA-LABEL: @main - // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]]( [&]() { - // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( -// LAMBDA: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master( -// LAMBDA-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0 -// LAMBDA-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] -// LAMBDA: [[THEN]] -// LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) -// LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 -// LAMBDA: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 -// LAMBDA: [[G_VAL:%.+]] = load volatile double, double* @{{.+}}, -// LAMBDA: store volatile double [[G_VAL]], double* [[G_PRIVATE_ADDR]] - -// LAMBDA: [[SIVAR_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 -// LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* @{{.+}}, -// LAMBDA: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]] - -// LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null) -// LAMBDA: call {{.*}}void @__kmpc_end_master( -// LAMBDA-NEXT: br label {{%?}}[[EXIT]] -// LAMBDA: [[EXIT]] -// LAMBDA: ret + + #pragma omp parallel master taskloop simd firstprivate(g, sivar) for (int i = 0; i < 10; ++i) { - // LAMBDA: define {{.+}} void [[INNER_LAMBDA:@.+]](%{{.+}}* {{[^,]*}} [[ARG_PTR:%.+]]) - // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], - // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] - // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]] - // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]] - - // LAMBDA: store double* %{{.+}}, double** %{{.+}}, - // LAMBDA: define internal noundef i32 [[TASK_ENTRY]](i32 noundef %0, %{{.+}}* noalias noundef %1) + g = 1; sivar = 11; - // LAMBDA: store double 1.0{{.+}}, double* %{{.+}}, - // LAMBDA: store i{{[0-9]+}} 11, i{{[0-9]+}}* %{{.+}}, - // LAMBDA: call void [[INNER_LAMBDA]](% - // LAMBDA: ret [&]() { g = 2; sivar = 22; @@ -108,51 +65,13 @@ int main() { }(); return 0; #elif defined(BLOCKS) - // BLOCKS: [[G:@.+]] ={{.*}} global double - // BLOCKS: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0, - // BLOCKS-LABEL: @main - // BLOCKS: call void {{%.+}}(i8 ^{ - // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8* - // BLOCKS: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master( - // BLOCKS-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0 - // BLOCKS-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] - // BLOCKS: [[THEN]] - // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) - // BLOCKS: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 - // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 - // BLOCKS: [[G_VAL:%.+]] = load volatile double, double* @{{.+}}, - // BLOCKS: store volatile double [[G_VAL]], double* [[G_PRIVATE_ADDR]] - - // BLOCKS: [[SIVAR_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 - // BLOCKS: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* @{{.+}}, - // BLOCKS: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]] - // BLOCKS: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null) - // BLOCKS: call {{.*}}void @__kmpc_end_master( - // BLOCKS-NEXT: br label {{%?}}[[EXIT]] - // BLOCKS: [[EXIT]] - // BLOCKS: ret + #pragma omp parallel master taskloop simd firstprivate(g, sivar) for (int i = 0; i < 10; ++i) { - // BLOCKS: define {{.+}} void {{@.+}}(i8* - // BLOCKS-NOT: [[G]]{{[[^:word:]]}} - // BLOCKS: store double 2.0{{.+}}, double* - // BLOCKS-NOT: [[G]]{{[[^:word:]]}} - // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}} - // BLOCKS: store i{{[0-9]+}} 22, i{{[0-9]+}}* - // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}} - // BLOCKS: ret - - // BLOCKS: store double* %{{.+}}, double** %{{.+}}, - // BLOCKS: store i{{[0-9]+}}* %{{.+}}, i{{[0-9]+}}** %{{.+}}, - // BLOCKS: define internal noundef i32 [[TASK_ENTRY]](i32 noundef %0, %{{.+}}* noalias noundef %1) + g = 1; sivar = 11; - // BLOCKS: store double 1.0{{.+}}, double* %{{.+}}, - // BLOCKS-NOT: [[G]]{{[[^:word:]]}} - // BLOCKS: store i{{[0-9]+}} 11, i{{[0-9]+}}* %{{.+}}, - // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}} - // BLOCKS: call void {{%.+}}(i8 ^{ g = 2; sivar = 22; @@ -177,303 +96,86 @@ int main() { #endif } -// CHECK: [[SIVAR:.+]] = internal global i{{[0-9]+}} 0, -// CHECK: define{{.*}} i{{[0-9]+}} @main() -// CHECK: alloca [[S_DOUBLE_TY]], -// CHECK: [[TEST:%.+]] = alloca [[S_DOUBLE_TY]], -// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32, -// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32], -// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]], -// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]], - -// CHECK: call {{.*}} [[S_DOUBLE_TY_COPY_CONSTR:@.+]]([[S_DOUBLE_TY]]* {{[^,]*}} [[TEST]], - -// CHECK: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master( -// CHECK-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0 -// CHECK-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] -// CHECK: [[THEN]] + + // Store original variables in capture struct. -// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: store [2 x [[S_DOUBLE_TY]]]* %{{.+}}, [2 x [[S_DOUBLE_TY]]]** [[S_ARR_REF]], -// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: store [[S_DOUBLE_TY]]* %{{.+}}, [[S_DOUBLE_TY]]** [[VAR_REF]], // Allocate task. // Returns struct kmp_task_t { // [[KMP_TASK_T]] task_data; // [[KMP_TASK_MAIN_TY]] privates; // }; -// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 9, i64 120, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) -// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_MAIN_TY]]* // Fill kmp_task_t->shareds by copying from original capture argument. -// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]], -// CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_MAIN_TY]]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 16, i1 false) // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). // Also copy address of private copy to the corresponding shareds reference. -// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 // Constructors for s_arr and var. // s_arr; -// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: bitcast [2 x [[S_DOUBLE_TY]]]* %{{.+}} to [[S_DOUBLE_TY]]* -// CHECK: call void [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* {{[^,]*}} [[S_ARR_CUR:%[^,]+]], -// CHECK: getelementptr [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* [[S_ARR_CUR]], i{{.+}} 1 -// CHECK: getelementptr [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} 1 -// CHECK: icmp eq -// CHECK: br i1 // var; -// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 -// CHECK-NEXT: call void [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* {{[^,]*}} [[PRIVATE_VAR_REF]], [[S_DOUBLE_TY]]* {{.*}}, // t_var; -// CHECK: [[PRIVATE_T_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 -// CHECK-NEXT: [[T_VAR:%.+]] = load i32, i32* %{{.+}}, -// CHECK-NEXT: store i32 [[T_VAR]], i32* [[PRIVATE_T_VAR_REF]], // vec; -// CHECK: [[PRIVATE_VEC_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 -// CHECK-NEXT: bitcast [2 x i32]* [[PRIVATE_VEC_REF]] to i8* -// CHECK-NEXT: bitcast [2 x i32]* %{{.+}} to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64( // sivar; -// CHECK: [[PRIVATE_SIVAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 4 -// CHECK-NEXT: [[SIVAR:%.+]] = load i{{.+}}, i{{.+}}* @{{.+}}, -// CHECK-NEXT: store i32 [[SIVAR]], i32* [[PRIVATE_SIVAR_REF]], // Provide pointer to destructor function, which will destroy private variables at the end of the task. -// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3 -// CHECK: [[DESTRUCTORS_PTR:%.+]] = bitcast %union{{.+}}* [[DESTRUCTORS_REF]] to i32 (i32, i8*)** -// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_PTR]], // Start task. -// CHECK: call void @__kmpc_taskloop(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*)) -// CHECK: call {{.*}}void @__kmpc_end_master( -// CHECK-NEXT: br label {{%?}}[[EXIT]] -// CHECK: [[EXIT]] - -// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_MAIN_TY]]* noalias noundef %0, [[S_DOUBLE_TY]]** noalias noundef %1, i32** noalias noundef %2, [2 x [[S_DOUBLE_TY]]]** noalias noundef %3, [2 x i32]** noalias noundef %4, i32** noalias noundef %5) -// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_MAIN_TY]]*, [[PRIVATES_MAIN_TY]]** -// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 0 -// CHECK: [[ARG3:%.+]] = load [2 x [[S_DOUBLE_TY]]]**, [2 x [[S_DOUBLE_TY]]]*** %{{.+}}, -// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[PRIV_S_VAR]], [2 x [[S_DOUBLE_TY]]]** [[ARG3]], -// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 1 -// CHECK: [[ARG1:%.+]] = load [[S_DOUBLE_TY]]**, [[S_DOUBLE_TY]]*** {{.+}}, -// CHECK: store [[S_DOUBLE_TY]]* [[PRIV_VAR]], [[S_DOUBLE_TY]]** [[ARG1]], -// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 2 -// CHECK: [[ARG2:%.+]] = load i32**, i32*** %{{.+}}, -// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG2]], -// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 3 -// CHECK: [[ARG4:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}}, -// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG4]], -// CHECK: [[PRIV_SIVAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 4 -// CHECK: [[ARG5:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** %{{.+}}, -// CHECK: store i{{[0-9]+}}* [[PRIV_SIVAR]], i{{[0-9]+}}** [[ARG5]], -// CHECK: ret void - -// CHECK: define internal noundef i32 [[TASK_ENTRY]](i32 noundef %0, [[KMP_TASK_MAIN_TY]]* noalias noundef %1) - -// CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]]*, -// CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, -// CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, -// CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, -// CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], -// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], - -// CHECK: [[FN:%.+]] = bitcast void (i8*, ...)* [[MAP_FN]] to void (i8*, -// CHECK: call void [[FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) - -// CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], -// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], -// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_DOUBLE_TY]]]*, [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], -// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]], -// CHECK: [[PRIV_SIVAR:%.+]] = load i32*, i32** [[PRIV_SIVAR_ADDR]], + + + + + // Privates actually are used. -// CHECK-DAG: [[PRIV_VAR]] -// CHECK-DAG: [[PRIV_T_VAR]] -// CHECK-DAG: [[PRIV_S_ARR]] -// CHECK-DAG: [[PRIV_VEC]] -// CHECK-DAG: [[PRIV_SIVAR]] - -// CHECK: ret - -// CHECK: define internal void [[MAIN_DUP]]([[KMP_TASK_MAIN_TY]]* noundef %0, [[KMP_TASK_MAIN_TY]]* noundef %1, i32 noundef %2) -// CHECK: getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* %{{.+}}, i32 0, i32 1 -// CHECK: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* %{{.+}}, i32 0, i32 0 -// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* %{{.+}}, i32 0, i32 0 -// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i64 2 -// CHECK: br i1 % - -// CHECK: phi [[S_DOUBLE_TY]]* -// CHECK: call {{.*}} [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* -// CHECK: getelementptr [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i32 1 -// CHECK: icmp eq [[S_DOUBLE_TY]]* % -// CHECK: br i1 % - -// CHECK: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* %{{.+}}, i32 0, i32 1 -// CHECK: call {{.*}} [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* -// CHECK: ret void - -// CHECK: define internal noundef i32 [[DESTRUCTORS]](i32 noundef %{{.+}}, [[KMP_TASK_MAIN_TY]]* noalias noundef %{{.+}}) -// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 -// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 -// CHECK: call void @_ZN1SIdED1Ev([[S_DOUBLE_TY]]* {{[^,]*}} [[PRIVATE_VAR_REF]]) -// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 -// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} 2 -// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} -1 -// CHECK: call void @_ZN1SIdED1Ev([[S_DOUBLE_TY]]* {{[^,]*}} [[PRIVATE_S_ARR_ELEM_REF]]) -// CHECK: icmp eq -// CHECK: br i1 -// CHECK: ret i32 - -// CHECK: alloca [[S_INT_TY]], -// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], -// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32, align 128 -// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32], -// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]], -// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]], - -// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR:@.+]]([[S_INT_TY]]* {{[^,]*}} [[TEST]], + + + + + + + // Store original variables in capture struct. -// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: store [2 x [[S_INT_TY]]]* %{{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_REF]], -// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: store [[S_INT_TY]]* %{{.+}}, [[S_INT_TY]]** [[VAR_REF]], // Allocate task. // Returns struct kmp_task_t { // [[KMP_TASK_T_TY]] task_data; // [[KMP_TASK_TMAIN_TY]] privates; // }; -// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 9, i64 256, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) -// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_TMAIN_TY]]* // Fill kmp_task_t->shareds by copying from original capture argument. -// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]], -// CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_TMAIN_TY]]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 16, i1 false) // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). -// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 // t_var; -// CHECK: [[PRIVATE_T_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 -// CHECK: [[T_VAR:%.+]] = load i32, i32* %{{.+}}, align 128 -// CHECK: store i32 [[T_VAR]], i32* [[PRIVATE_T_VAR_REF]], align 128 // vec; -// CHECK: [[PRIVATE_VEC_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 -// CHECK-NEXT: bitcast [2 x i32]* [[PRIVATE_VEC_REF]] to i8* -// CHECK-NEXT: bitcast [2 x i32]* %{{.+}} to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64( // Constructors for s_arr and var. // a_arr; -// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 -// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 -// CHECK: bitcast [2 x [[S_INT_TY]]]* %{{.+}} to [[S_INT_TY]]* -// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2 -// CHECK: call void [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* {{[^,]*}} [[S_ARR_CUR:%[^,]+]], -// CHECK: getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_CUR]], i{{.+}} 1 -// CHECK: icmp eq -// CHECK: br i1 // var; -// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 -// CHECK-NEXT: call void [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* {{[^,]*}} [[PRIVATE_VAR_REF]], // Provide pointer to destructor function, which will destroy private variables at the end of the task. -// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3 -// CHECK: [[DESTRUCTORS_PTR:%.+]] = bitcast %union{{.+}}* [[DESTRUCTORS_REF]] to i32 (i32, i8*)** -// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_PTR]], // Start task. -// CHECK: call void @__kmpc_taskloop(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_TMAIN_TY]]*, [[KMP_TASK_TMAIN_TY]]*, i32)* [[TMAIN_DUP:@.+]] to i8*)) - -// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_TMAIN_TY]]* noalias noundef %{{.+}}, i32** noalias noundef %{{.+}}, [2 x i32]** noalias noundef %{{.+}}, [2 x [[S_INT_TY]]]** noalias noundef %{{.+}}, [[S_INT_TY]]** noalias noundef %{{.+}}) -// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_TMAIN_TY]]*, [[PRIVATES_TMAIN_TY]]** -// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 0 -// CHECK: [[ARG1:%.+]] = load i32**, i32*** %{{.+}}, -// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG1]], -// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 1 -// CHECK: [[ARG2:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}}, -// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG2]], -// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 2 -// CHECK: [[ARG3:%.+]] = load [2 x [[S_INT_TY]]]**, [2 x [[S_INT_TY]]]*** %{{.+}}, -// CHECK: store [2 x [[S_INT_TY]]]* [[PRIV_S_VAR]], [2 x [[S_INT_TY]]]** [[ARG3]], -// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 3 -// CHECK: [[ARG4:%.+]] = load [[S_INT_TY]]**, [[S_INT_TY]]*** {{.+}}, -// CHECK: store [[S_INT_TY]]* [[PRIV_VAR]], [[S_INT_TY]]** [[ARG4]], -// CHECK: ret void - -// CHECK: define internal noundef i32 [[TASK_ENTRY]](i32 noundef %0, [[KMP_TASK_TMAIN_TY]]* noalias noundef %1) -// CHECK: alloca i32*, -// CHECK-DAG: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, -// CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, -// CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, -// CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], -// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], -// CHECK: [[FN:%.+]] = bitcast void (i8*, ...)* [[MAP_FN]] to void (i8*, -// CHECK: call void [[FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) -// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], -// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]], -// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], -// CHECK: [[PRIV_VAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[PRIV_VAR_ADDR]], + + // Privates actually are used. -// CHECK-DAG: [[PRIV_VAR]] -// CHECK-DAG: [[PRIV_T_VAR]] -// CHECK-DAG: [[PRIV_S_ARR]] -// CHECK-DAG: [[PRIV_VEC]] - -// CHECK: ret - -// CHECK: define internal void [[TMAIN_DUP]]([[KMP_TASK_TMAIN_TY]]* noundef %0, [[KMP_TASK_TMAIN_TY]]* noundef %1, i32 noundef %2) -// CHECK: getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* %{{.+}}, i32 0, i32 2 -// CHECK: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* %{{.+}}, i32 0, i32 2 -// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* %{{.+}}, i32 0, i32 0 -// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i64 2 -// CHECK: br i1 % - -// CHECK: phi [[S_INT_TY]]* -// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* -// CHECK: getelementptr [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i32 1 -// CHECK: icmp eq [[S_INT_TY]]* % -// CHECK: br i1 % - -// CHECK: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* %{{.+}}, i32 0, i32 3 -// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* -// CHECK: ret void - -// CHECK: define internal noundef i32 [[DESTRUCTORS]](i32 noundef %0, [[KMP_TASK_TMAIN_TY]]* noalias noundef %1) -// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 -// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 -// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 -// CHECK: call void @_ZN1SIiED1Ev([[S_INT_TY]]* {{[^,]*}} [[PRIVATE_VAR_REF]]) -// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 -// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2 -// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} -1 -// CHECK: call void @_ZN1SIiED1Ev([[S_INT_TY]]* {{[^,]*}} [[PRIVATE_S_ARR_ELEM_REF]]) -// CHECK: icmp eq -// CHECK: br i1 -// CHECK: ret i32 + + + + + #endif #else -// ARRAY-LABEL: array_func struct St { int a, b; St() : a(0), b(0) {} @@ -482,13 +184,2309 @@ struct St { }; void array_func(int n, float a[n], St s[2]) { -// ARRAY: call i8* @__kmpc_omp_task_alloc( -// ARRAY: call void @__kmpc_taskloop( -// ARRAY: store float** %{{.+}}, float*** %{{.+}}, -// ARRAY: store %struct.St** %{{.+}}, %struct.St*** %{{.+}}, #pragma omp parallel master taskloop simd firstprivate(a, s) for (int i = 0; i < 10; ++i) ; } #endif +// CHECK-LABEL: define {{[^@]+}}@main +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S]], align 8 +// CHECK-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 16 +// CHECK-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 8 +// CHECK-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// CHECK-NEXT: call void @_ZN1SIdEC1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) +// CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]], double noundef 0.000000e+00) +// CHECK-NEXT: store i32 0, i32* [[T_VAR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const.main.vec to i8*), i64 8, i1 false) +// CHECK-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 +// CHECK-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_BEGIN]], double noundef 1.000000e+00) +// CHECK-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_ELEMENT]], double noundef 2.000000e+00) +// CHECK-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]], double noundef 3.000000e+00) +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* +// CHECK-NEXT: store i32 [[TMP1]], i32* [[CONV]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[VEC]], i64 [[TMP2]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[VAR]]) +// CHECK-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() +// CHECK-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 +// CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]]) #[[ATTR4:[0-9]+]] +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK: arraydestroy.body: +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP3]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK: arraydestroy.done1: +// CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]]) #[[ATTR4]] +// CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP4]] +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ev +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIdEC2Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC1ERKS0_d +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[S:%.*]], double noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[S_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[T_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 +// CHECK-NEXT: store double [[T]], double* [[T_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[T_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIdEC2ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP0]], double noundef [[TMP1]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ed +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], double noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store double [[A]], double* [[A_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[A_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIdEC2Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]], double noundef [[TMP0]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_outlined. +// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 8 dereferenceable(16) [[S_ARR:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 +// CHECK-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 +// CHECK-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 +// CHECK-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 +// CHECK-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 +// CHECK-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 +// CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK: omp_if.then: +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP7]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP8]], align 8 +// CHECK-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: [[TMP9:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 9, i64 120, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to %struct.kmp_task_t_with_privates* +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP10]], i32 0, i32 0 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 0 +// CHECK-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP13]], i8* align 8 [[TMP14]], i64 16, i1 false) +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP10]], i32 0, i32 1 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 0 +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP16]], i32 0, i32 0 +// CHECK-NEXT: [[TMP17:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP18]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK: omp.arraycpy.body: +// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP17]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[OMP_ARRAYCPY_SRCELEMENTPAST]], double noundef 0.000000e+00) +// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK: omp.arraycpy.done1: +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 1 +// CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP19]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP2]], double noundef 0.000000e+00) +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 2 +// CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK-NEXT: store i32 [[TMP21]], i32* [[TMP20]], align 8 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 3 +// CHECK-NEXT: [[TMP23:%.*]] = bitcast [2 x i32]* [[TMP22]] to i8* +// CHECK-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP23]], i8* align 4 [[TMP24]], i64 8, i1 false) +// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 4 +// CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 +// CHECK-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4 +// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 3 +// CHECK-NEXT: [[TMP28:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP27]] to i32 (i32, i8*)** +// CHECK-NEXT: store i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_destructor. to i32 (i32, i8*)*), i32 (i32, i8*)** [[TMP28]], align 8 +// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 5 +// CHECK-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 6 +// CHECK-NEXT: store i64 9, i64* [[TMP30]], align 8 +// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 7 +// CHECK-NEXT: store i64 1, i64* [[TMP31]], align 8 +// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 9 +// CHECK-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i8* +// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP33]], i8 0, i64 8, i1 false) +// CHECK-NEXT: [[TMP34:%.*]] = load i64, i64* [[TMP31]], align 8 +// CHECK-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP9]], i32 1, i64* [[TMP29]], i64* [[TMP30]], i64 [[TMP34]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates*, i32)* @.omp_task_dup. to i8*)) +// CHECK-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: br label [[OMP_IF_END]] +// CHECK: omp_if.end: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_privates_map. +// CHECK-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], %struct.S** noalias noundef [[TMP1:%.*]], i32** noalias noundef [[TMP2:%.*]], [2 x %struct.S]** noalias noundef [[TMP3:%.*]], [2 x i32]** noalias noundef [[TMP4:%.*]], i32** noalias noundef [[TMP5:%.*]]) #[[ATTR6:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.S**, align 8 +// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i32**, align 8 +// CHECK-NEXT: [[DOTADDR3:%.*]] = alloca [2 x %struct.S]**, align 8 +// CHECK-NEXT: [[DOTADDR4:%.*]] = alloca [2 x i32]**, align 8 +// CHECK-NEXT: [[DOTADDR5:%.*]] = alloca i32**, align 8 +// CHECK-NEXT: store %struct..kmp_privates.t* [[TMP0]], %struct..kmp_privates.t** [[DOTADDR]], align 8 +// CHECK-NEXT: store %struct.S** [[TMP1]], %struct.S*** [[DOTADDR1]], align 8 +// CHECK-NEXT: store i32** [[TMP2]], i32*** [[DOTADDR2]], align 8 +// CHECK-NEXT: store [2 x %struct.S]** [[TMP3]], [2 x %struct.S]*** [[DOTADDR3]], align 8 +// CHECK-NEXT: store [2 x i32]** [[TMP4]], [2 x i32]*** [[DOTADDR4]], align 8 +// CHECK-NEXT: store i32** [[TMP5]], i32*** [[DOTADDR5]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load %struct..kmp_privates.t*, %struct..kmp_privates.t** [[DOTADDR]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 0 +// CHECK-NEXT: [[TMP8:%.*]] = load [2 x %struct.S]**, [2 x %struct.S]*** [[DOTADDR3]], align 8 +// CHECK-NEXT: store [2 x %struct.S]* [[TMP7]], [2 x %struct.S]** [[TMP8]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 1 +// CHECK-NEXT: [[TMP10:%.*]] = load %struct.S**, %struct.S*** [[DOTADDR1]], align 8 +// CHECK-NEXT: store %struct.S* [[TMP9]], %struct.S** [[TMP10]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 2 +// CHECK-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[DOTADDR2]], align 8 +// CHECK-NEXT: store i32* [[TMP11]], i32** [[TMP12]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 3 +// CHECK-NEXT: [[TMP14:%.*]] = load [2 x i32]**, [2 x i32]*** [[DOTADDR4]], align 8 +// CHECK-NEXT: store [2 x i32]* [[TMP13]], [2 x i32]** [[TMP14]], align 8 +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 4 +// CHECK-NEXT: [[TMP16:%.*]] = load i32**, i32*** [[DOTADDR5]], align 8 +// CHECK-NEXT: store i32* [[TMP15]], i32** [[TMP16]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_entry. +// CHECK-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// CHECK-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[DOTLB__ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTUB__ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [2 x %struct.S]*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca [2 x i32]*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR4_I:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[I_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// CHECK-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 +// CHECK-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* +// CHECK-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 +// CHECK-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 +// CHECK-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 7 +// CHECK-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP16]], align 8 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 8 +// CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 +// CHECK-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, %struct.S**, i32**, [2 x %struct.S]**, [2 x i32]**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, %struct.S**, i32**, [2 x %struct.S]**, [2 x i32]**, i32**)* +// CHECK-NEXT: call void [[TMP25]](i8* [[TMP24]], %struct.S** [[DOTFIRSTPRIV_PTR_ADDR_I]], i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [2 x %struct.S]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [2 x i32]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], i32** [[DOTFIRSTPRIV_PTR_ADDR4_I]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP26:%.*]] = load %struct.S*, %struct.S** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP28:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP29:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR4_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP31]] to i32 +// CHECK-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] +// CHECK: omp.inner.for.cond.i: +// CHECK-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK-NEXT: [[CONV5_I:%.*]] = sext i32 [[TMP32]] to i64 +// CHECK-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV5_I]], [[TMP33]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] +// CHECK: omp.inner.for.body.i: +// CHECK-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// CHECK-NEXT: store i32 [[TMP34]], i32* [[I_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// CHECK-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP27]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP29]], i64 0, i64 0 +// CHECK-NEXT: store i32 [[TMP35]], i32* [[ARRAYIDX_I]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP28]], i64 0, i64 0 +// CHECK-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[ARRAYIDX6_I]] to i8* +// CHECK-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[TMP26]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP36]], i8* align 8 [[TMP37]], i64 8, i1 false), !llvm.access.group [[ACC_GRP15]] +// CHECK-NEXT: store i32 33, i32* [[TMP30]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// CHECK-NEXT: [[ADD7_I:%.*]] = add nsw i32 [[TMP38]], 1 +// CHECK-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK: .omp_outlined..1.exit: +// CHECK-NEXT: ret i32 0 +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_dup. +// CHECK-SAME: (%struct.kmp_task_t_with_privates* noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]]) #[[ATTR7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates* [[TMP0]], %struct.kmp_task_t_with_privates** [[DOTADDR]], align 8 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-NEXT: store i32 [[TMP2]], i32* [[DOTADDR2]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP5]], i32 0, i32 0 +// CHECK-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 +// CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP8]], i32 0, i32 0 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP9]], i32 0, i32 0 +// CHECK-NEXT: [[TMP12:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP11]], align 8 +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP10]], i32 0, i32 0 +// CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP12]] to %struct.S* +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK: omp.arraycpy.body: +// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[OMP_ARRAYCPY_SRCELEMENTPAST]], double noundef 0.000000e+00) +// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK: omp.arraycpy.done3: +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP8]], i32 0, i32 1 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP9]], i32 0, i32 1 +// CHECK-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[TMP16]], align 8 +// CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP15]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP17]], double noundef 0.000000e+00) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_destructor. +// CHECK-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// CHECK-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP2]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 1 +// CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP5]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK: arraydestroy.body: +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK: arraydestroy.done2: +// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP7]] +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdED1Ev +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIdED2Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]]) #[[ATTR4]] +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_Z5tmainIiET_v +// CHECK-SAME: () #[[ATTR9:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// CHECK-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]], i32 noundef 0) +// CHECK-NEXT: store i32 0, i32* [[T_VAR]], align 128 +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) +// CHECK-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 +// CHECK-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_VAR]], align 128 +// CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* +// CHECK-NEXT: store i32 [[TMP1]], i32* [[CONV]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [2 x i32]* [[VEC]], i64 [[TMP2]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[VAR]]) +// CHECK-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK: arraydestroy.body: +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK: arraydestroy.done1: +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP4]] +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ev +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: store double 0.000000e+00, double* [[F]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC2ERKS0_d +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[S:%.*]], double noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[S_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[T_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 +// CHECK-NEXT: store double [[T]], double* [[T_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 +// CHECK-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[F2]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load double, double* [[T_ADDR]], align 8 +// CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[TMP2]] +// CHECK-NEXT: store double [[ADD]], double* [[F]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ed +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], double noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store double [[A]], double* [[A_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[A_ADDR]], align 8 +// CHECK-NEXT: store double [[TMP0]], double* [[F]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdED2Ev +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_i +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// CHECK-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_ADDR]], align 4 +// CHECK-NEXT: call void @_ZN1SIiEC2ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0]], i32 noundef [[TMP1]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2 +// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 +// CHECK-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 +// CHECK-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 +// CHECK-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 +// CHECK-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 +// CHECK-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 +// CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK: omp_if.then: +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP7]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP8]], align 8 +// CHECK-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: [[TMP9:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 9, i64 256, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.2*)* @.omp_task_entry..5 to i32 (i32, i8*)*)) +// CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to %struct.kmp_task_t_with_privates.2* +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP10]], i32 0, i32 0 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 0 +// CHECK-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 128 +// CHECK-NEXT: [[TMP14:%.*]] = bitcast %struct.anon.1* [[AGG_CAPTURED]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP13]], i8* align 8 [[TMP14]], i64 16, i1 false) +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP10]], i32 0, i32 2 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP15]], i32 0, i32 0 +// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP16]], align 128 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP15]], i32 0, i32 1 +// CHECK-NEXT: [[TMP19:%.*]] = bitcast [2 x i32]* [[TMP18]] to i8* +// CHECK-NEXT: [[TMP20:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 8, i1 false) +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP15]], i32 0, i32 2 +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP21]], i32 0, i32 0 +// CHECK-NEXT: [[TMP22:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP23]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK: omp.arraycpy.body: +// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP22]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 noundef 0) +// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK: omp.arraycpy.done1: +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP15]], i32 0, i32 3 +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP24]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP2]], i32 noundef 0) +// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 3 +// CHECK-NEXT: [[TMP26:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP25]] to i32 (i32, i8*)** +// CHECK-NEXT: store i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.2*)* @.omp_task_destructor..7 to i32 (i32, i8*)*), i32 (i32, i8*)** [[TMP26]], align 8 +// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 5 +// CHECK-NEXT: store i64 0, i64* [[TMP27]], align 8 +// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 6 +// CHECK-NEXT: store i64 9, i64* [[TMP28]], align 16 +// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 7 +// CHECK-NEXT: store i64 1, i64* [[TMP29]], align 8 +// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 9 +// CHECK-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i8* +// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP31]], i8 0, i64 8, i1 false) +// CHECK-NEXT: [[TMP32:%.*]] = load i64, i64* [[TMP29]], align 8 +// CHECK-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP9]], i32 1, i64* [[TMP27]], i64* [[TMP28]], i64 [[TMP32]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2*, i32)* @.omp_task_dup..6 to i8*)) +// CHECK-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: br label [[OMP_IF_END]] +// CHECK: omp_if.end: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_privates_map..4 +// CHECK-SAME: (%struct..kmp_privates.t.3* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]], [2 x i32]** noalias noundef [[TMP2:%.*]], [2 x %struct.S.0]** noalias noundef [[TMP3:%.*]], %struct.S.0** noalias noundef [[TMP4:%.*]]) #[[ATTR6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.3*, align 8 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 8 +// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca [2 x i32]**, align 8 +// CHECK-NEXT: [[DOTADDR3:%.*]] = alloca [2 x %struct.S.0]**, align 8 +// CHECK-NEXT: [[DOTADDR4:%.*]] = alloca %struct.S.0**, align 8 +// CHECK-NEXT: store %struct..kmp_privates.t.3* [[TMP0]], %struct..kmp_privates.t.3** [[DOTADDR]], align 8 +// CHECK-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 8 +// CHECK-NEXT: store [2 x i32]** [[TMP2]], [2 x i32]*** [[DOTADDR2]], align 8 +// CHECK-NEXT: store [2 x %struct.S.0]** [[TMP3]], [2 x %struct.S.0]*** [[DOTADDR3]], align 8 +// CHECK-NEXT: store %struct.S.0** [[TMP4]], %struct.S.0*** [[DOTADDR4]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load %struct..kmp_privates.t.3*, %struct..kmp_privates.t.3** [[DOTADDR]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 0 +// CHECK-NEXT: [[TMP7:%.*]] = load i32**, i32*** [[DOTADDR1]], align 8 +// CHECK-NEXT: store i32* [[TMP6]], i32** [[TMP7]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 1 +// CHECK-NEXT: [[TMP9:%.*]] = load [2 x i32]**, [2 x i32]*** [[DOTADDR2]], align 8 +// CHECK-NEXT: store [2 x i32]* [[TMP8]], [2 x i32]** [[TMP9]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 2 +// CHECK-NEXT: [[TMP11:%.*]] = load [2 x %struct.S.0]**, [2 x %struct.S.0]*** [[DOTADDR3]], align 8 +// CHECK-NEXT: store [2 x %struct.S.0]* [[TMP10]], [2 x %struct.S.0]** [[TMP11]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 3 +// CHECK-NEXT: [[TMP13:%.*]] = load %struct.S.0**, %struct.S.0*** [[DOTADDR4]], align 8 +// CHECK-NEXT: store %struct.S.0* [[TMP12]], %struct.S.0** [[TMP13]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_entry..5 +// CHECK-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// CHECK-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[DOTLB__ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTUB__ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca [2 x i32]*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [2 x %struct.S.0]*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[I_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 128 +// CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 2 +// CHECK-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.3* [[TMP9]] to i8* +// CHECK-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.2* [[TMP3]] to i8* +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 +// CHECK-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 +// CHECK-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 16 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 7 +// CHECK-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP16]], align 8 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 8 +// CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 64 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 +// CHECK-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META28:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]]) +// CHECK-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !32 +// CHECK-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.3*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* @.omp_task_privates_map..4 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !32 +// CHECK-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: [[TMP22:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* +// CHECK-NEXT: call void [[TMP25]](i8* [[TMP24]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], [2 x i32]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [2 x %struct.S.0]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], %struct.S.0** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: [[TMP27:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !32 +// CHECK-NEXT: [[TMP28:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !32 +// CHECK-NEXT: [[TMP29:%.*]] = load %struct.S.0*, %struct.S.0** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !32 +// CHECK-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP30]] to i32 +// CHECK-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !32 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] +// CHECK: omp.inner.for.cond.i: +// CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !32, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK-NEXT: [[CONV4_I:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !32, !llvm.access.group [[ACC_GRP33]] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV4_I]], [[TMP32]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__3_EXIT:%.*]] +// CHECK: omp.inner.for.body.i: +// CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !32, !llvm.access.group [[ACC_GRP33]] +// CHECK-NEXT: store i32 [[TMP33]], i32* [[I_I]], align 4, !noalias !32, !llvm.access.group [[ACC_GRP33]] +// CHECK-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP26]], align 128, !llvm.access.group [[ACC_GRP33]] +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP27]], i64 0, i64 0 +// CHECK-NEXT: store i32 [[TMP34]], i32* [[ARRAYIDX_I]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP28]], i64 0, i64 0 +// CHECK-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5_I]] to i8* +// CHECK-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[TMP29]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i64 4, i1 false), !llvm.access.group [[ACC_GRP33]] +// CHECK-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !32, !llvm.access.group [[ACC_GRP33]] +// CHECK-NEXT: [[ADD6_I:%.*]] = add nsw i32 [[TMP37]], 1 +// CHECK-NEXT: store i32 [[ADD6_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !32, !llvm.access.group [[ACC_GRP33]] +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK: .omp_outlined..3.exit: +// CHECK-NEXT: ret i32 0 +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_dup..6 +// CHECK-SAME: (%struct.kmp_task_t_with_privates.2* noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]]) #[[ATTR7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP0]], %struct.kmp_task_t_with_privates.2** [[DOTADDR]], align 8 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: store i32 [[TMP2]], i32* [[DOTADDR2]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP5]], i32 0, i32 0 +// CHECK-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 128 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 2 +// CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP8]], i32 0, i32 2 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP9]], i32 0, i32 0 +// CHECK-NEXT: [[TMP12:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP11]], align 8 +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP10]], i32 0, i32 0 +// CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S.0]* [[TMP12]] to %struct.S.0* +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK: omp.arraycpy.body: +// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 noundef 0) +// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK: omp.arraycpy.done3: +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP8]], i32 0, i32 3 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP9]], i32 0, i32 1 +// CHECK-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP16]], align 8 +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP15]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP17]], i32 noundef 0) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_destructor..7 +// CHECK-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP2]], i32 0, i32 2 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP3]], i32 0, i32 2 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP3]], i32 0, i32 3 +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP5]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK: arraydestroy.body: +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK: arraydestroy.done2: +// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP7]] +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: store i32 0, i32* [[F]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_i +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// CHECK-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 +// CHECK-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_ADDR]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] +// CHECK-NEXT: store i32 [[ADD]], i32* [[F]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: ret void +// +// +// LAMBDA-LABEL: define {{[^@]+}}@main +// LAMBDA-SAME: () #[[ATTR0:[0-9]+]] { +// LAMBDA-NEXT: entry: +// LAMBDA-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 1 +// LAMBDA-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// LAMBDA-NEXT: call void @"_ZZ4mainENK3$_0clEv"(%class.anon* noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) +// LAMBDA-NEXT: ret i32 0 +// +// +// LAMBDA-LABEL: define {{[^@]+}}@.omp_outlined. +// LAMBDA-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// LAMBDA-NEXT: entry: +// LAMBDA-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// LAMBDA-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// LAMBDA-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// LAMBDA-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// LAMBDA-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// LAMBDA-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// LAMBDA-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// LAMBDA-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]]) +// LAMBDA-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +// LAMBDA-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// LAMBDA: omp_if.then: +// LAMBDA-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// LAMBDA-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// LAMBDA-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.kmp_task_t_with_privates* +// LAMBDA-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP5]], i32 0, i32 0 +// LAMBDA-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP5]], i32 0, i32 1 +// LAMBDA-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP7]], i32 0, i32 0 +// LAMBDA-NEXT: [[TMP9:%.*]] = load volatile double, double* @g, align 8 +// LAMBDA-NEXT: store volatile double [[TMP9]], double* [[TMP8]], align 8 +// LAMBDA-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP7]], i32 0, i32 1 +// LAMBDA-NEXT: [[TMP11:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 +// LAMBDA-NEXT: store i32 [[TMP11]], i32* [[TMP10]], align 8 +// LAMBDA-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 5 +// LAMBDA-NEXT: store i64 0, i64* [[TMP12]], align 8 +// LAMBDA-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 6 +// LAMBDA-NEXT: store i64 9, i64* [[TMP13]], align 8 +// LAMBDA-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 7 +// LAMBDA-NEXT: store i64 1, i64* [[TMP14]], align 8 +// LAMBDA-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 9 +// LAMBDA-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i8* +// LAMBDA-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP16]], i8 0, i64 8, i1 false) +// LAMBDA-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP14]], align 8 +// LAMBDA-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP4]], i32 1, i64* [[TMP12]], i64* [[TMP13]], i64 [[TMP17]], i32 1, i32 0, i64 0, i8* null) +// LAMBDA-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// LAMBDA-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// LAMBDA-NEXT: br label [[OMP_IF_END]] +// LAMBDA: omp_if.end: +// LAMBDA-NEXT: ret void +// +// +// LAMBDA-LABEL: define {{[^@]+}}@.omp_task_privates_map. +// LAMBDA-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], double** noalias noundef [[TMP1:%.*]], i32** noalias noundef [[TMP2:%.*]]) #[[ATTR5:[0-9]+]] { +// LAMBDA-NEXT: entry: +// LAMBDA-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 +// LAMBDA-NEXT: [[DOTADDR1:%.*]] = alloca double**, align 8 +// LAMBDA-NEXT: [[DOTADDR2:%.*]] = alloca i32**, align 8 +// LAMBDA-NEXT: store %struct..kmp_privates.t* [[TMP0]], %struct..kmp_privates.t** [[DOTADDR]], align 8 +// LAMBDA-NEXT: store double** [[TMP1]], double*** [[DOTADDR1]], align 8 +// LAMBDA-NEXT: store i32** [[TMP2]], i32*** [[DOTADDR2]], align 8 +// LAMBDA-NEXT: [[TMP3:%.*]] = load %struct..kmp_privates.t*, %struct..kmp_privates.t** [[DOTADDR]], align 8 +// LAMBDA-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 0 +// LAMBDA-NEXT: [[TMP5:%.*]] = load double**, double*** [[DOTADDR1]], align 8 +// LAMBDA-NEXT: store double* [[TMP4]], double** [[TMP5]], align 8 +// LAMBDA-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 1 +// LAMBDA-NEXT: [[TMP7:%.*]] = load i32**, i32*** [[DOTADDR2]], align 8 +// LAMBDA-NEXT: store i32* [[TMP6]], i32** [[TMP7]], align 8 +// LAMBDA-NEXT: ret void +// +// +// LAMBDA-LABEL: define {{[^@]+}}@.omp_task_entry. +// LAMBDA-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias noundef [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] { +// LAMBDA-NEXT: entry: +// LAMBDA-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// LAMBDA-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// LAMBDA-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// LAMBDA-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// LAMBDA-NEXT: [[DOTLB__ADDR_I:%.*]] = alloca i64, align 8 +// LAMBDA-NEXT: [[DOTUB__ADDR_I:%.*]] = alloca i64, align 8 +// LAMBDA-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 +// LAMBDA-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 +// LAMBDA-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// LAMBDA-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca double*, align 8 +// LAMBDA-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8 +// LAMBDA-NEXT: [[I_I:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: [[REF_TMP_I:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// LAMBDA-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// LAMBDA-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// LAMBDA-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// LAMBDA-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// LAMBDA-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// LAMBDA-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0 +// LAMBDA-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// LAMBDA-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// LAMBDA-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// LAMBDA-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// LAMBDA-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 +// LAMBDA-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* +// LAMBDA-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* +// LAMBDA-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 +// LAMBDA-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 +// LAMBDA-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 +// LAMBDA-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8 +// LAMBDA-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 7 +// LAMBDA-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP16]], align 8 +// LAMBDA-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 8 +// LAMBDA-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 +// LAMBDA-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 +// LAMBDA-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 +// LAMBDA-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// LAMBDA-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// LAMBDA-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// LAMBDA-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// LAMBDA-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// LAMBDA-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// LAMBDA-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, double**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// LAMBDA-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, double**, i32**)* +// LAMBDA-NEXT: call void [[TMP25]](i8* [[TMP24]], double** [[DOTFIRSTPRIV_PTR_ADDR_I]], i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR3:[0-9]+]] +// LAMBDA-NEXT: [[TMP26:%.*]] = load double*, double** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP28]] to i32 +// LAMBDA-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// LAMBDA-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] +// LAMBDA: omp.inner.for.cond.i: +// LAMBDA-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// LAMBDA-NEXT: [[CONV2_I:%.*]] = sext i32 [[TMP29]] to i64 +// LAMBDA-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// LAMBDA-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV2_I]], [[TMP30]] +// LAMBDA-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] +// LAMBDA: omp.inner.for.body.i: +// LAMBDA-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// LAMBDA-NEXT: store i32 [[TMP31]], i32* [[I_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// LAMBDA-NEXT: store double 1.000000e+00, double* [[TMP26]], align 8, !llvm.access.group [[ACC_GRP15]] +// LAMBDA-NEXT: store i32 11, i32* [[TMP27]], align 4, !llvm.access.group [[ACC_GRP15]] +// LAMBDA-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP_I]], i32 0, i32 0 +// LAMBDA-NEXT: store double* [[TMP26]], double** [[TMP32]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// LAMBDA-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP_I]], i32 0, i32 1 +// LAMBDA-NEXT: store i32* [[TMP27]], i32** [[TMP33]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// LAMBDA-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(16) [[REF_TMP_I]]), !llvm.access.group [[ACC_GRP15]] +// LAMBDA-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// LAMBDA-NEXT: [[ADD3_I:%.*]] = add nsw i32 [[TMP34]], 1 +// LAMBDA-NEXT: store i32 [[ADD3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// LAMBDA-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP16:![0-9]+]] +// LAMBDA: .omp_outlined..1.exit: +// LAMBDA-NEXT: ret i32 0 +// +// +// BLOCKS-LABEL: define {{[^@]+}}@main +// BLOCKS-SAME: () #[[ATTR1:[0-9]+]] { +// BLOCKS-NEXT: entry: +// BLOCKS-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// BLOCKS-NEXT: [[TMP0:%.*]] = load i8*, i8** getelementptr inbounds ([[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global to %struct.__block_literal_generic*), i32 0, i32 3), align 8 +// BLOCKS-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to void (i8*)* +// BLOCKS-NEXT: call void [[TMP1]](i8* noundef bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global to i8*)) +// BLOCKS-NEXT: ret i32 0 +// +// +// BLOCKS-LABEL: define {{[^@]+}}@__main_block_invoke +// BLOCKS-SAME: (i8* noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR2:[0-9]+]] { +// BLOCKS-NEXT: entry: +// BLOCKS-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 +// BLOCKS-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>*, align 8 +// BLOCKS-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 +// BLOCKS-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* +// BLOCKS-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 +// BLOCKS-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// BLOCKS-NEXT: ret void +// +// +// BLOCKS-LABEL: define {{[^@]+}}@.omp_outlined. +// BLOCKS-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// BLOCKS-NEXT: entry: +// BLOCKS-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// BLOCKS-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// BLOCKS-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// BLOCKS-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// BLOCKS-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// BLOCKS-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// BLOCKS-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// BLOCKS-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// BLOCKS-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +// BLOCKS-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// BLOCKS: omp_if.then: +// BLOCKS-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// BLOCKS-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// BLOCKS-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.kmp_task_t_with_privates* +// BLOCKS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP5]], i32 0, i32 0 +// BLOCKS-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP5]], i32 0, i32 1 +// BLOCKS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP7]], i32 0, i32 0 +// BLOCKS-NEXT: [[TMP9:%.*]] = load volatile double, double* @g, align 8 +// BLOCKS-NEXT: store volatile double [[TMP9]], double* [[TMP8]], align 8 +// BLOCKS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP7]], i32 0, i32 1 +// BLOCKS-NEXT: [[TMP11:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 +// BLOCKS-NEXT: store i32 [[TMP11]], i32* [[TMP10]], align 8 +// BLOCKS-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 5 +// BLOCKS-NEXT: store i64 0, i64* [[TMP12]], align 8 +// BLOCKS-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 6 +// BLOCKS-NEXT: store i64 9, i64* [[TMP13]], align 8 +// BLOCKS-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 7 +// BLOCKS-NEXT: store i64 1, i64* [[TMP14]], align 8 +// BLOCKS-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 9 +// BLOCKS-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i8* +// BLOCKS-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP16]], i8 0, i64 8, i1 false) +// BLOCKS-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP14]], align 8 +// BLOCKS-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP4]], i32 1, i64* [[TMP12]], i64* [[TMP13]], i64 [[TMP17]], i32 1, i32 0, i64 0, i8* null) +// BLOCKS-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// BLOCKS-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// BLOCKS-NEXT: br label [[OMP_IF_END]] +// BLOCKS: omp_if.end: +// BLOCKS-NEXT: ret void +// +// +// BLOCKS-LABEL: define {{[^@]+}}@_block_invoke +// BLOCKS-SAME: (i8* noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR2]] { +// BLOCKS-NEXT: entry: +// BLOCKS-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 +// BLOCKS-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>*, align 8 +// BLOCKS-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 +// BLOCKS-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* +// BLOCKS-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>** [[BLOCK_ADDR]], align 8 +// BLOCKS-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], i32 0, i32 5 +// BLOCKS-NEXT: store double 2.000000e+00, double* [[BLOCK_CAPTURE_ADDR]], align 8 +// BLOCKS-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], i32 0, i32 6 +// BLOCKS-NEXT: store i32 22, i32* [[BLOCK_CAPTURE_ADDR1]], align 8 +// BLOCKS-NEXT: ret void +// +// +// BLOCKS-LABEL: define {{[^@]+}}@.omp_task_privates_map. +// BLOCKS-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], double** noalias noundef [[TMP1:%.*]], i32** noalias noundef [[TMP2:%.*]]) #[[ATTR6:[0-9]+]] { +// BLOCKS-NEXT: entry: +// BLOCKS-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 +// BLOCKS-NEXT: [[DOTADDR1:%.*]] = alloca double**, align 8 +// BLOCKS-NEXT: [[DOTADDR2:%.*]] = alloca i32**, align 8 +// BLOCKS-NEXT: store %struct..kmp_privates.t* [[TMP0]], %struct..kmp_privates.t** [[DOTADDR]], align 8 +// BLOCKS-NEXT: store double** [[TMP1]], double*** [[DOTADDR1]], align 8 +// BLOCKS-NEXT: store i32** [[TMP2]], i32*** [[DOTADDR2]], align 8 +// BLOCKS-NEXT: [[TMP3:%.*]] = load %struct..kmp_privates.t*, %struct..kmp_privates.t** [[DOTADDR]], align 8 +// BLOCKS-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 0 +// BLOCKS-NEXT: [[TMP5:%.*]] = load double**, double*** [[DOTADDR1]], align 8 +// BLOCKS-NEXT: store double* [[TMP4]], double** [[TMP5]], align 8 +// BLOCKS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 1 +// BLOCKS-NEXT: [[TMP7:%.*]] = load i32**, i32*** [[DOTADDR2]], align 8 +// BLOCKS-NEXT: store i32* [[TMP6]], i32** [[TMP7]], align 8 +// BLOCKS-NEXT: ret void +// +// +// BLOCKS-LABEL: define {{[^@]+}}@.omp_task_entry. +// BLOCKS-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { +// BLOCKS-NEXT: entry: +// BLOCKS-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// BLOCKS-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// BLOCKS-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// BLOCKS-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// BLOCKS-NEXT: [[DOTLB__ADDR_I:%.*]] = alloca i64, align 8 +// BLOCKS-NEXT: [[DOTUB__ADDR_I:%.*]] = alloca i64, align 8 +// BLOCKS-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 +// BLOCKS-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 +// BLOCKS-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// BLOCKS-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca double*, align 8 +// BLOCKS-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8 +// BLOCKS-NEXT: [[I_I:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: [[BLOCK_I:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, align 8 +// BLOCKS-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// BLOCKS-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// BLOCKS-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// BLOCKS-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// BLOCKS-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// BLOCKS-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0 +// BLOCKS-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// BLOCKS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// BLOCKS-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// BLOCKS-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// BLOCKS-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 +// BLOCKS-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* +// BLOCKS-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* +// BLOCKS-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 +// BLOCKS-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 +// BLOCKS-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 +// BLOCKS-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8 +// BLOCKS-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 7 +// BLOCKS-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP16]], align 8 +// BLOCKS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 8 +// BLOCKS-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 +// BLOCKS-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 +// BLOCKS-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 +// BLOCKS-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// BLOCKS-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// BLOCKS-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// BLOCKS-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// BLOCKS-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// BLOCKS-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// BLOCKS-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, double**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// BLOCKS-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, double**, i32**)* +// BLOCKS-NEXT: call void [[TMP25]](i8* [[TMP24]], double** [[DOTFIRSTPRIV_PTR_ADDR_I]], i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR4:[0-9]+]] +// BLOCKS-NEXT: [[TMP26:%.*]] = load double*, double** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP28]] to i32 +// BLOCKS-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// BLOCKS-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] +// BLOCKS: omp.inner.for.cond.i: +// BLOCKS-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// BLOCKS-NEXT: [[CONV2_I:%.*]] = sext i32 [[TMP29]] to i64 +// BLOCKS-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV2_I]], [[TMP30]] +// BLOCKS-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] +// BLOCKS: omp.inner.for.body.i: +// BLOCKS-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: store i32 [[TMP31]], i32* [[I_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: store double 1.000000e+00, double* [[TMP26]], align 8, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: store i32 11, i32* [[TMP27]], align 4, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: [[BLOCK_ISA_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 0 +// BLOCKS-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA_I]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: [[BLOCK_FLAGS_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 1 +// BLOCKS-NEXT: store i32 1073741824, i32* [[BLOCK_FLAGS_I]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: [[BLOCK_RESERVED_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 2 +// BLOCKS-NEXT: store i32 0, i32* [[BLOCK_RESERVED_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: [[BLOCK_INVOKE_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 3 +// BLOCKS-NEXT: store i8* bitcast (void (i8*)* @_block_invoke to i8*), i8** [[BLOCK_INVOKE_I]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: [[BLOCK_DESCRIPTOR_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 4 +// BLOCKS-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp.2 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR_I]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: [[BLOCK_CAPTURED_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 5 +// BLOCKS-NEXT: [[TMP32:%.*]] = load volatile double, double* [[TMP26]], align 8, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: store volatile double [[TMP32]], double* [[BLOCK_CAPTURED_I]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: [[BLOCK_CAPTURED3_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 6 +// BLOCKS-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP27]], align 4, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: store i32 [[TMP33]], i32* [[BLOCK_CAPTURED3_I]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: [[TMP34:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]] to void ()* +// BLOCKS-NEXT: [[BLOCK_LITERAL_I:%.*]] = bitcast void ()* [[TMP34]] to %struct.__block_literal_generic* +// BLOCKS-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL_I]], i32 0, i32 3 +// BLOCKS-NEXT: [[TMP36:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL_I]] to i8* +// BLOCKS-NEXT: [[TMP37:%.*]] = load i8*, i8** [[TMP35]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: [[TMP38:%.*]] = bitcast i8* [[TMP37]] to void (i8*)* +// BLOCKS-NEXT: call void [[TMP38]](i8* noundef [[TMP36]]) #[[ATTR4]], !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: [[ADD4_I:%.*]] = add nsw i32 [[TMP39]], 1 +// BLOCKS-NEXT: store i32 [[ADD4_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP16:![0-9]+]] +// BLOCKS: .omp_outlined..1.exit: +// BLOCKS-NEXT: ret i32 0 +// +// +// ARRAY-LABEL: define {{[^@]+}}@_Z10array_funciPfP2St +// ARRAY-SAME: (i32 noundef [[N:%.*]], float* noundef [[A:%.*]], %struct.St* noundef [[S:%.*]]) #[[ATTR0:[0-9]+]] { +// ARRAY-NEXT: entry: +// ARRAY-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// ARRAY-NEXT: [[S_ADDR:%.*]] = alloca %struct.St*, align 8 +// ARRAY-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// ARRAY-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// ARRAY-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8 +// ARRAY-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// ARRAY-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// ARRAY-NEXT: [[TMP2:%.*]] = load float*, float** [[A_ADDR]], align 8 +// ARRAY-NEXT: [[TMP3:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 8 +// ARRAY-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, float*, %struct.St*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], float* [[TMP2]], %struct.St* [[TMP3]]) +// ARRAY-NEXT: ret void +// +// +// ARRAY-LABEL: define {{[^@]+}}@.omp_outlined. +// ARRAY-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], float* noundef [[A:%.*]], %struct.St* noundef [[S:%.*]]) #[[ATTR1:[0-9]+]] { +// ARRAY-NEXT: entry: +// ARRAY-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// ARRAY-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// ARRAY-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// ARRAY-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// ARRAY-NEXT: [[S_ADDR:%.*]] = alloca %struct.St*, align 8 +// ARRAY-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// ARRAY-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// ARRAY-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// ARRAY-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// ARRAY-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// ARRAY-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8 +// ARRAY-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// ARRAY-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// ARRAY-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// ARRAY-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// ARRAY-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// ARRAY-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// ARRAY: omp_if.then: +// ARRAY-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// ARRAY-NEXT: store i64 [[TMP0]], i64* [[TMP5]], align 8 +// ARRAY-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// ARRAY-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i64 96, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// ARRAY-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to %struct.kmp_task_t_with_privates* +// ARRAY-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP7]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 +// ARRAY-NEXT: [[TMP11:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* +// ARRAY-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 [[TMP11]], i64 8, i1 false) +// ARRAY-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP7]], i32 0, i32 1 +// ARRAY-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP12]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP14:%.*]] = load float*, float** [[A_ADDR]], align 8 +// ARRAY-NEXT: store float* [[TMP14]], float** [[TMP13]], align 8 +// ARRAY-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP12]], i32 0, i32 1 +// ARRAY-NEXT: [[TMP16:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 8 +// ARRAY-NEXT: store %struct.St* [[TMP16]], %struct.St** [[TMP15]], align 8 +// ARRAY-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 5 +// ARRAY-NEXT: store i64 0, i64* [[TMP17]], align 8 +// ARRAY-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 6 +// ARRAY-NEXT: store i64 9, i64* [[TMP18]], align 8 +// ARRAY-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 7 +// ARRAY-NEXT: store i64 1, i64* [[TMP19]], align 8 +// ARRAY-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 9 +// ARRAY-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i8* +// ARRAY-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP21]], i8 0, i64 8, i1 false) +// ARRAY-NEXT: [[TMP22:%.*]] = load i64, i64* [[TMP19]], align 8 +// ARRAY-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* [[TMP6]], i32 1, i64* [[TMP17]], i64* [[TMP18]], i64 [[TMP22]], i32 1, i32 0, i64 0, i8* null) +// ARRAY-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// ARRAY-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// ARRAY-NEXT: br label [[OMP_IF_END]] +// ARRAY: omp_if.end: +// ARRAY-NEXT: ret void +// +// +// ARRAY-LABEL: define {{[^@]+}}@.omp_task_privates_map. +// ARRAY-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], float*** noalias noundef [[TMP1:%.*]], %struct.St*** noalias noundef [[TMP2:%.*]]) #[[ATTR4:[0-9]+]] { +// ARRAY-NEXT: entry: +// ARRAY-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 +// ARRAY-NEXT: [[DOTADDR1:%.*]] = alloca float***, align 8 +// ARRAY-NEXT: [[DOTADDR2:%.*]] = alloca %struct.St***, align 8 +// ARRAY-NEXT: store %struct..kmp_privates.t* [[TMP0]], %struct..kmp_privates.t** [[DOTADDR]], align 8 +// ARRAY-NEXT: store float*** [[TMP1]], float**** [[DOTADDR1]], align 8 +// ARRAY-NEXT: store %struct.St*** [[TMP2]], %struct.St**** [[DOTADDR2]], align 8 +// ARRAY-NEXT: [[TMP3:%.*]] = load %struct..kmp_privates.t*, %struct..kmp_privates.t** [[DOTADDR]], align 8 +// ARRAY-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP5:%.*]] = load float***, float**** [[DOTADDR1]], align 8 +// ARRAY-NEXT: store float** [[TMP4]], float*** [[TMP5]], align 8 +// ARRAY-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 1 +// ARRAY-NEXT: [[TMP7:%.*]] = load %struct.St***, %struct.St**** [[DOTADDR2]], align 8 +// ARRAY-NEXT: store %struct.St** [[TMP6]], %struct.St*** [[TMP7]], align 8 +// ARRAY-NEXT: ret void +// +// +// ARRAY-LABEL: define {{[^@]+}}@.omp_task_entry. +// ARRAY-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias noundef [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] { +// ARRAY-NEXT: entry: +// ARRAY-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// ARRAY-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// ARRAY-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// ARRAY-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// ARRAY-NEXT: [[DOTLB__ADDR_I:%.*]] = alloca i64, align 8 +// ARRAY-NEXT: [[DOTUB__ADDR_I:%.*]] = alloca i64, align 8 +// ARRAY-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 +// ARRAY-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 +// ARRAY-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// ARRAY-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca float**, align 8 +// ARRAY-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca %struct.St**, align 8 +// ARRAY-NEXT: [[I_I:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// ARRAY-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// ARRAY-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// ARRAY-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// ARRAY-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// ARRAY-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// ARRAY-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// ARRAY-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// ARRAY-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 +// ARRAY-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* +// ARRAY-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* +// ARRAY-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 +// ARRAY-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 +// ARRAY-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 +// ARRAY-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8 +// ARRAY-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 7 +// ARRAY-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP16]], align 8 +// ARRAY-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 8 +// ARRAY-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 +// ARRAY-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 +// ARRAY-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 +// ARRAY-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// ARRAY-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// ARRAY-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// ARRAY-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// ARRAY-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// ARRAY-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// ARRAY-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, float***, %struct.St***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// ARRAY-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP24:%.*]] = load i64, i64* [[TMP23]], align 8 +// ARRAY-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP27:%.*]] = bitcast void (i8*, ...)* [[TMP25]] to void (i8*, float***, %struct.St***)* +// ARRAY-NEXT: call void [[TMP27]](i8* [[TMP26]], float*** [[DOTFIRSTPRIV_PTR_ADDR_I]], %struct.St*** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR2:[0-9]+]] +// ARRAY-NEXT: [[TMP28:%.*]] = load float**, float*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP29:%.*]] = load %struct.St**, %struct.St*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP30]] to i32 +// ARRAY-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// ARRAY-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] +// ARRAY: omp.inner.for.cond.i: +// ARRAY-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// ARRAY-NEXT: [[CONV2_I:%.*]] = sext i32 [[TMP31]] to i64 +// ARRAY-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// ARRAY-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV2_I]], [[TMP32]] +// ARRAY-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] +// ARRAY: omp.inner.for.body.i: +// ARRAY-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// ARRAY-NEXT: store i32 [[TMP33]], i32* [[I_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// ARRAY-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// ARRAY-NEXT: [[ADD3_I:%.*]] = add nsw i32 [[TMP34]], 1 +// ARRAY-NEXT: store i32 [[ADD3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// ARRAY-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP16:![0-9]+]] +// ARRAY: .omp_outlined..1.exit: +// ARRAY-NEXT: ret i32 0 +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@main +// SIMD-ONLY0-SAME: () #[[ATTR0:[0-9]+]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// SIMD-ONLY0-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S]], align 8 +// SIMD-ONLY0-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// SIMD-ONLY0-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 16 +// SIMD-ONLY0-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 8 +// SIMD-ONLY0-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]], double noundef 0.000000e+00) +// SIMD-ONLY0-NEXT: store i32 0, i32* [[T_VAR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// SIMD-ONLY0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const.main.vec to i8*), i64 8, i1 false) +// SIMD-ONLY0-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_BEGIN]], double noundef 1.000000e+00) +// SIMD-ONLY0-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYINIT_BEGIN]], i64 1 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_ELEMENT]], double noundef 2.000000e+00) +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]], double noundef 3.000000e+00) +// SIMD-ONLY0-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// SIMD-ONLY0-NEXT: store i64 9, i64* [[DOTOMP_UB]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// SIMD-ONLY0-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV]], i32* [[DOTOMP_IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// SIMD-ONLY0: omp.inner.for.cond: +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// SIMD-ONLY0-NEXT: [[CONV1:%.*]] = sext i32 [[TMP2]] to i64 +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY0-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV1]], [[TMP3]] +// SIMD-ONLY0-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// SIMD-ONLY0: omp.inner.for.body: +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY0-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// SIMD-ONLY0-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// SIMD-ONLY0-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 +// SIMD-ONLY0-NEXT: store i32 [[TMP5]], i32* [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY0-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// SIMD-ONLY0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP6]], i8* align 8 [[TMP7]], i64 8, i1 false), !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY0-NEXT: store i32 33, i32* @_ZZ4mainE5sivar, align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY0-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// SIMD-ONLY0: omp.body.continue: +// SIMD-ONLY0-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// SIMD-ONLY0: omp.inner.for.inc: +// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY0-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// SIMD-ONLY0-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY0-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// SIMD-ONLY0: omp.inner.for.end: +// SIMD-ONLY0-NEXT: store i32 10, i32* [[I]], align 4 +// SIMD-ONLY0-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() +// SIMD-ONLY0-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]]) #[[ATTR4:[0-9]+]] +// SIMD-ONLY0-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// SIMD-ONLY0-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// SIMD-ONLY0: arraydestroy.body: +// SIMD-ONLY0-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP9]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// SIMD-ONLY0-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// SIMD-ONLY0-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] +// SIMD-ONLY0: arraydestroy.done4: +// SIMD-ONLY0-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: [[TMP10:%.*]] = load i32, i32* [[RETVAL]], align 4 +// SIMD-ONLY0-NEXT: ret i32 [[TMP10]] +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ev +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC2Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]]) +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdEC1ERKS0_d +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[S:%.*]], double noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: [[S_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: [[T_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store double [[T]], double* [[T_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load double, double* [[T_ADDR]], align 8 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC2ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP0]], double noundef [[TMP1]]) +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ed +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], double noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store double [[A]], double* [[A_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load double, double* [[A_ADDR]], align 8 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC2Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]], double noundef [[TMP0]]) +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_Z5tmainIiET_v +// SIMD-ONLY0-SAME: () #[[ATTR3:[0-9]+]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// SIMD-ONLY0-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0]], align 4 +// SIMD-ONLY0-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// SIMD-ONLY0-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// SIMD-ONLY0-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// SIMD-ONLY0-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// SIMD-ONLY0-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]], i32 noundef 0) +// SIMD-ONLY0-NEXT: store i32 0, i32* [[T_VAR]], align 128 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// SIMD-ONLY0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) +// SIMD-ONLY0-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// SIMD-ONLY0-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) +// SIMD-ONLY0-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// SIMD-ONLY0-NEXT: store i64 9, i64* [[DOTOMP_UB]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// SIMD-ONLY0-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV]], i32* [[DOTOMP_IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// SIMD-ONLY0: omp.inner.for.cond: +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// SIMD-ONLY0-NEXT: [[CONV1:%.*]] = sext i32 [[TMP2]] to i64 +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY0-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV1]], [[TMP3]] +// SIMD-ONLY0-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// SIMD-ONLY0: omp.inner.for.body: +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY0-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// SIMD-ONLY0-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// SIMD-ONLY0-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR]], align 128, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 +// SIMD-ONLY0-NEXT: store i32 [[TMP5]], i32* [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY0-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = bitcast %struct.S.0* [[ARRAYIDX2]] to i8* +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* +// SIMD-ONLY0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP6]], i8* align 4 [[TMP7]], i64 4, i1 false), !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY0-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// SIMD-ONLY0: omp.body.continue: +// SIMD-ONLY0-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// SIMD-ONLY0: omp.inner.for.inc: +// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY0-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// SIMD-ONLY0-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY0-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// SIMD-ONLY0: omp.inner.for.end: +// SIMD-ONLY0-NEXT: store i32 10, i32* [[I]], align 4 +// SIMD-ONLY0-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// SIMD-ONLY0-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// SIMD-ONLY0: arraydestroy.body: +// SIMD-ONLY0-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP9]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// SIMD-ONLY0-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// SIMD-ONLY0-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] +// SIMD-ONLY0: arraydestroy.done4: +// SIMD-ONLY0-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: [[TMP10:%.*]] = load i32, i32* [[RETVAL]], align 4 +// SIMD-ONLY0-NEXT: ret i32 [[TMP10]] +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdED1Ev +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdED2Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ev +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: store double 0.000000e+00, double* [[F]], align 8 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdED2Ev +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdEC2ERKS0_d +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[S:%.*]], double noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: [[S_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: [[T_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store double [[T]], double* [[T_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP0]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load double, double* [[F2]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load double, double* [[T_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[TMP2]] +// SIMD-ONLY0-NEXT: store double [[ADD]], double* [[F]], align 8 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ed +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], double noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store double [[A]], double* [[A_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load double, double* [[A_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP0]], double* [[F]], align 8 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_i +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_ADDR]], align 4 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC2ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0]], i32 noundef [[TMP1]]) +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: store i32 0, i32* [[F]], align 4 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_i +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] +// SIMD-ONLY0-NEXT: store i32 [[ADD]], i32* [[F]], align 4 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@main +// SIMD-ONLY1-SAME: () #[[ATTR0:[0-9]+]] { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// SIMD-ONLY1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S]], align 8 +// SIMD-ONLY1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// SIMD-ONLY1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 16 +// SIMD-ONLY1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 8 +// SIMD-ONLY1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// SIMD-ONLY1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// SIMD-ONLY1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]], double noundef 0.000000e+00) +// SIMD-ONLY1-NEXT: store i32 0, i32* [[T_VAR]], align 4 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// SIMD-ONLY1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const.main.vec to i8*), i64 8, i1 false) +// SIMD-ONLY1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_BEGIN]], double noundef 1.000000e+00) +// SIMD-ONLY1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYINIT_BEGIN]], i64 1 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_ELEMENT]], double noundef 2.000000e+00) +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]], double noundef 3.000000e+00) +// SIMD-ONLY1-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// SIMD-ONLY1-NEXT: store i64 9, i64* [[DOTOMP_UB]], align 8 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// SIMD-ONLY1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// SIMD-ONLY1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_IV]], align 4 +// SIMD-ONLY1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// SIMD-ONLY1: omp.inner.for.cond: +// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// SIMD-ONLY1-NEXT: [[CONV1:%.*]] = sext i32 [[TMP2]] to i64 +// SIMD-ONLY1-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV1]], [[TMP3]] +// SIMD-ONLY1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// SIMD-ONLY1: omp.inner.for.body: +// SIMD-ONLY1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// SIMD-ONLY1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// SIMD-ONLY1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY1-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 +// SIMD-ONLY1-NEXT: store i32 [[TMP5]], i32* [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY1-NEXT: [[TMP6:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* +// SIMD-ONLY1-NEXT: [[TMP7:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// SIMD-ONLY1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP6]], i8* align 8 [[TMP7]], i64 8, i1 false), !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY1-NEXT: store i32 33, i32* @_ZZ4mainE5sivar, align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// SIMD-ONLY1: omp.body.continue: +// SIMD-ONLY1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// SIMD-ONLY1: omp.inner.for.inc: +// SIMD-ONLY1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// SIMD-ONLY1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// SIMD-ONLY1: omp.inner.for.end: +// SIMD-ONLY1-NEXT: store i32 10, i32* [[I]], align 4 +// SIMD-ONLY1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() +// SIMD-ONLY1-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]]) #[[ATTR4:[0-9]+]] +// SIMD-ONLY1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// SIMD-ONLY1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// SIMD-ONLY1: arraydestroy.body: +// SIMD-ONLY1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP9]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// SIMD-ONLY1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// SIMD-ONLY1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] +// SIMD-ONLY1: arraydestroy.done4: +// SIMD-ONLY1-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: [[TMP10:%.*]] = load i32, i32* [[RETVAL]], align 4 +// SIMD-ONLY1-NEXT: ret i32 [[TMP10]] +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ev +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC2Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]]) +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdEC1ERKS0_d +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[S:%.*]], double noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: [[T_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store double [[T]], double* [[T_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load double, double* [[T_ADDR]], align 8 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC2ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP0]], double noundef [[TMP1]]) +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ed +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], double noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store double [[A]], double* [[A_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load double, double* [[A_ADDR]], align 8 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC2Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]], double noundef [[TMP0]]) +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_Z5tmainIiET_v +// SIMD-ONLY1-SAME: () #[[ATTR3:[0-9]+]] { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// SIMD-ONLY1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0]], align 4 +// SIMD-ONLY1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// SIMD-ONLY1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// SIMD-ONLY1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// SIMD-ONLY1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// SIMD-ONLY1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// SIMD-ONLY1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// SIMD-ONLY1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]], i32 noundef 0) +// SIMD-ONLY1-NEXT: store i32 0, i32* [[T_VAR]], align 128 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// SIMD-ONLY1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) +// SIMD-ONLY1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// SIMD-ONLY1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) +// SIMD-ONLY1-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// SIMD-ONLY1-NEXT: store i64 9, i64* [[DOTOMP_UB]], align 8 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// SIMD-ONLY1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// SIMD-ONLY1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_IV]], align 4 +// SIMD-ONLY1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// SIMD-ONLY1: omp.inner.for.cond: +// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// SIMD-ONLY1-NEXT: [[CONV1:%.*]] = sext i32 [[TMP2]] to i64 +// SIMD-ONLY1-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV1]], [[TMP3]] +// SIMD-ONLY1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// SIMD-ONLY1: omp.inner.for.body: +// SIMD-ONLY1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// SIMD-ONLY1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// SIMD-ONLY1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY1-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR]], align 128, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 +// SIMD-ONLY1-NEXT: store i32 [[TMP5]], i32* [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY1-NEXT: [[TMP6:%.*]] = bitcast %struct.S.0* [[ARRAYIDX2]] to i8* +// SIMD-ONLY1-NEXT: [[TMP7:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* +// SIMD-ONLY1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP6]], i8* align 4 [[TMP7]], i64 4, i1 false), !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// SIMD-ONLY1: omp.body.continue: +// SIMD-ONLY1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// SIMD-ONLY1: omp.inner.for.inc: +// SIMD-ONLY1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// SIMD-ONLY1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// SIMD-ONLY1: omp.inner.for.end: +// SIMD-ONLY1-NEXT: store i32 10, i32* [[I]], align 4 +// SIMD-ONLY1-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// SIMD-ONLY1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// SIMD-ONLY1: arraydestroy.body: +// SIMD-ONLY1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP9]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// SIMD-ONLY1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// SIMD-ONLY1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] +// SIMD-ONLY1: arraydestroy.done4: +// SIMD-ONLY1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: [[TMP10:%.*]] = load i32, i32* [[RETVAL]], align 4 +// SIMD-ONLY1-NEXT: ret i32 [[TMP10]] +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdED1Ev +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdED2Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ev +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: store double 0.000000e+00, double* [[F]], align 8 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdED2Ev +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdEC2ERKS0_d +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[S:%.*]], double noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: [[T_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store double [[T]], double* [[T_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP0]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load double, double* [[F2]], align 8 +// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = load double, double* [[T_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[TMP2]] +// SIMD-ONLY1-NEXT: store double [[ADD]], double* [[F]], align 8 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ed +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], double noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store double [[A]], double* [[A_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load double, double* [[A_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store double [[TMP0]], double* [[F]], align 8 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_i +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_ADDR]], align 4 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC2ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0]], i32 noundef [[TMP1]]) +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: store i32 0, i32* [[F]], align 4 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_i +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 +// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] +// SIMD-ONLY1-NEXT: store i32 [[ADD]], i32* [[F]], align 4 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// SIMD-ONLY1-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@main +// SIMD-ONLY2-SAME: () #[[ATTR0:[0-9]+]] { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 1 +// SIMD-ONLY2-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// SIMD-ONLY2-NEXT: call void @"_ZZ4mainENK3$_0clEv"(%class.anon* noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) +// SIMD-ONLY2-NEXT: ret i32 0 +// +// +// SIMD-ONLY3-LABEL: define {{[^@]+}}@main +// SIMD-ONLY3-SAME: () #[[ATTR1:[0-9]+]] { +// SIMD-ONLY3-NEXT: entry: +// SIMD-ONLY3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SIMD-ONLY3-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// SIMD-ONLY3-NEXT: [[TMP0:%.*]] = load i8*, i8** getelementptr inbounds ([[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global to %struct.__block_literal_generic*), i32 0, i32 3), align 8 +// SIMD-ONLY3-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to void (i8*)* +// SIMD-ONLY3-NEXT: call void [[TMP1]](i8* noundef bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global to i8*)) +// SIMD-ONLY3-NEXT: ret i32 0 +// +// +// SIMD-ONLY3-LABEL: define {{[^@]+}}@__main_block_invoke +// SIMD-ONLY3-SAME: (i8* noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR2:[0-9]+]] { +// SIMD-ONLY3-NEXT: entry: +// SIMD-ONLY3-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 +// SIMD-ONLY3-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>*, align 8 +// SIMD-ONLY3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// SIMD-ONLY3-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// SIMD-ONLY3-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// SIMD-ONLY3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// SIMD-ONLY3-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY3-NEXT: [[BLOCK2:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, align 8 +// SIMD-ONLY3-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 +// SIMD-ONLY3-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* +// SIMD-ONLY3-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 +// SIMD-ONLY3-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// SIMD-ONLY3-NEXT: store i64 9, i64* [[DOTOMP_UB]], align 8 +// SIMD-ONLY3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// SIMD-ONLY3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// SIMD-ONLY3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_IV]], align 4 +// SIMD-ONLY3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// SIMD-ONLY3: omp.inner.for.cond: +// SIMD-ONLY3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// SIMD-ONLY3-NEXT: [[CONV1:%.*]] = sext i32 [[TMP1]] to i64 +// SIMD-ONLY3-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV1]], [[TMP2]] +// SIMD-ONLY3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// SIMD-ONLY3: omp.inner.for.body: +// SIMD-ONLY3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 +// SIMD-ONLY3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// SIMD-ONLY3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: store double 1.000000e+00, double* @g, align 8, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: store i32 11, i32* @_ZZ4mainE5sivar, align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK2]], i32 0, i32 0 +// SIMD-ONLY3-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA]], align 8, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK2]], i32 0, i32 1 +// SIMD-ONLY3-NEXT: store i32 1073741824, i32* [[BLOCK_FLAGS]], align 8, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: [[BLOCK_RESERVED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK2]], i32 0, i32 2 +// SIMD-ONLY3-NEXT: store i32 0, i32* [[BLOCK_RESERVED]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: [[BLOCK_INVOKE:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK2]], i32 0, i32 3 +// SIMD-ONLY3-NEXT: store i8* bitcast (void (i8*)* @__main_block_invoke_2 to i8*), i8** [[BLOCK_INVOKE]], align 8, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK2]], i32 0, i32 4 +// SIMD-ONLY3-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp.1 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR]], align 8, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK2]], i32 0, i32 5 +// SIMD-ONLY3-NEXT: [[TMP4:%.*]] = load volatile double, double* @g, align 8, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: store volatile double [[TMP4]], double* [[BLOCK_CAPTURED]], align 8, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: [[BLOCK_CAPTURED3:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK2]], i32 0, i32 6 +// SIMD-ONLY3-NEXT: [[TMP5:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: store i32 [[TMP5]], i32* [[BLOCK_CAPTURED3]], align 8, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: [[TMP6:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK2]] to void ()* +// SIMD-ONLY3-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP6]] to %struct.__block_literal_generic* +// SIMD-ONLY3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// SIMD-ONLY3-NEXT: [[TMP8:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// SIMD-ONLY3-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP7]], align 8, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to void (i8*)* +// SIMD-ONLY3-NEXT: call void [[TMP10]](i8* noundef [[TMP8]]), !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// SIMD-ONLY3: omp.body.continue: +// SIMD-ONLY3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// SIMD-ONLY3: omp.inner.for.inc: +// SIMD-ONLY3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// SIMD-ONLY3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// SIMD-ONLY3: omp.inner.for.end: +// SIMD-ONLY3-NEXT: store i32 10, i32* [[I]], align 4 +// SIMD-ONLY3-NEXT: ret void +// +// +// SIMD-ONLY3-LABEL: define {{[^@]+}}@__main_block_invoke_2 +// SIMD-ONLY3-SAME: (i8* noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR2]] { +// SIMD-ONLY3-NEXT: entry: +// SIMD-ONLY3-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 +// SIMD-ONLY3-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>*, align 8 +// SIMD-ONLY3-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 +// SIMD-ONLY3-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* +// SIMD-ONLY3-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>** [[BLOCK_ADDR]], align 8 +// SIMD-ONLY3-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], i32 0, i32 5 +// SIMD-ONLY3-NEXT: store double 2.000000e+00, double* [[BLOCK_CAPTURE_ADDR]], align 8 +// SIMD-ONLY3-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], i32 0, i32 6 +// SIMD-ONLY3-NEXT: store i32 22, i32* [[BLOCK_CAPTURE_ADDR1]], align 8 +// SIMD-ONLY3-NEXT: ret void +// +// +// SIMD-ONLY4-LABEL: define {{[^@]+}}@_Z10array_funciPfP2St +// SIMD-ONLY4-SAME: (i32 noundef [[N:%.*]], float* noundef [[A:%.*]], %struct.St* noundef [[S:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY4-NEXT: entry: +// SIMD-ONLY4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY4-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// SIMD-ONLY4-NEXT: [[S_ADDR:%.*]] = alloca %struct.St*, align 8 +// SIMD-ONLY4-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// SIMD-ONLY4-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// SIMD-ONLY4-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// SIMD-ONLY4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// SIMD-ONLY4-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY4-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// SIMD-ONLY4-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8 +// SIMD-ONLY4-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY4-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// SIMD-ONLY4-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// SIMD-ONLY4-NEXT: store i64 9, i64* [[DOTOMP_UB]], align 8 +// SIMD-ONLY4-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// SIMD-ONLY4-NEXT: [[CONV:%.*]] = trunc i64 [[TMP2]] to i32 +// SIMD-ONLY4-NEXT: store i32 [[CONV]], i32* [[DOTOMP_IV]], align 4 +// SIMD-ONLY4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// SIMD-ONLY4: omp.inner.for.cond: +// SIMD-ONLY4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// SIMD-ONLY4-NEXT: [[CONV1:%.*]] = sext i32 [[TMP3]] to i64 +// SIMD-ONLY4-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY4-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV1]], [[TMP4]] +// SIMD-ONLY4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// SIMD-ONLY4: omp.inner.for.body: +// SIMD-ONLY4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 1 +// SIMD-ONLY4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// SIMD-ONLY4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// SIMD-ONLY4: omp.body.continue: +// SIMD-ONLY4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// SIMD-ONLY4: omp.inner.for.inc: +// SIMD-ONLY4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 +// SIMD-ONLY4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// SIMD-ONLY4: omp.inner.for.end: +// SIMD-ONLY4-NEXT: store i32 10, i32* [[I]], align 4 +// SIMD-ONLY4-NEXT: ret void +// diff --git a/clang/test/OpenMP/target_firstprivate_codegen.cpp b/clang/test/OpenMP/target_firstprivate_codegen.cpp index 4f8f5ae37472c..05a6e891204f2 100644 --- a/clang/test/OpenMP/target_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_firstprivate_codegen.cpp @@ -1,38 +1,37 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ // Test host codegen. -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK0 // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK1 +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK2 // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK3 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY01 %s +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY02 %s // RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY03 %s // Test target codegen - host bc file has to be created first. // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s -// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK1 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK2 // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s -// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK3 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s // RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s -// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY11 %s // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY12 %s // RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s -// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s -// SIMD-ONLY1-NOT: {{__kmpc|__tgt}} +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY13 %s // expected-no-diagnostics #ifndef HEADER @@ -47,28 +46,9 @@ struct TT { int ga = 5; #pragma omp end declare target -// CHECK-DAG: [[TT:%.+]] = type { i64, i8 } -// CHECK-DAG: [[TTII:%.+]] = type { i32, i32 } -// CHECK-DAG: [[S1:%.+]] = type { double } - -// TCHECK-DAG: [[TT:%.+]] = type { i64, i8 } -// TCHECK-DAG: [[TTII:%.+]] = type { i32, i32 } -// TCHECK-DAG: [[S1:%.+]] = type { double } - -// CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [3 x i{{32|64}}] [i[[SZ:32|64]] 4, i{{64|32}} {{8|4}}, i[[SZ:32|64]] 4] -// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 49, i64 288] -// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [9 x i64] [i64 2, i64 40, i64 {{4|8}}, i64 0, i64 400, i64 {{4|8}}, i64 {{4|8}}, i64 0, i64 {{12|16}}] -// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [9 x i64] [i64 288, i64 161, i64 800, i64 161, i64 161, i64 800, i64 800, i64 161, i64 161] -// CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i{{32|64}}] [i{{32|64}} 0, i{{32|64}} 8] -// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i64] [i64 32, i64 161] -// CHECK-DAG: [[SIZET4:@.+]] = private unnamed_addr constant [5 x i64] [i64 8, i64 4, i64 {{4|8}}, i64 {{4|8}}, i64 0] -// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [5 x i64] [i64 547, i64 288, i64 800, i64 800, i64 161] -// CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i{{32|64}}] [i[[SZ]] 4, i[[SZ]] 1, i[[SZ]] 40] -// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 161] -// CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [2 x i{{32|64}}] [i[[SZ]] 4, i[[SZ]] 40] -// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [2 x i64] [i64 288, i64 161] - -// CHECK: define {{.*}}[[FOO:@.+]]( + + + int foo(int n, double *ptr) { int a = 0; short aa = 0; @@ -85,75 +65,7 @@ int foo(int n, double *ptr) { } // a is passed by value to tgt_target - // CHECK: [[N_ADDR:%.+]] = alloca i{{[0-9]+}}, - // CHECK: [[PTR_ADDR:%.+]] = alloca double*, - // CHECK: [[A:%.+]] = alloca i{{[0-9]+}}, - // CHECK: [[A2:%.+]] = alloca i{{[0-9]+}}, - // CHECK: [[B:%.+]] = alloca [10 x float], - // CHECK: [[SSTACK:%.+]] = alloca i8*, - // CHECK: [[C:%.+]] = alloca [5 x [10 x double]], - // CHECK: [[D:%.+]] = alloca [[TT]], - // CHECK: [[FP_E:%.+]] = alloca [[TTII]], - // CHECK: [[P:%.+]] = alloca i32*, align 64 - // CHECK: [[ACAST:%.+]] = alloca i{{[0-9]+}}, - // CHECK: [[BASE_PTR_ARR:%.+]] = alloca [3 x i8*], - // CHECK: [[PTR_ARR:%.+]] = alloca [3 x i8*], - // CHECK: [[A2CAST:%.+]] = alloca i{{[0-9]+}}, - // CHECK: [[BASE_PTR_ARR2:%.+]] = alloca [9 x i8*], - // CHECK: [[PTR_ARR2:%.+]] = alloca [9 x i8*], - // CHECK: [[SIZET2:%.+]] = alloca [9 x i{{[0-9]+}}], - // CHECK: [[BASE_PTR_ARR3:%.+]] = alloca [2 x i8*], - // CHECK: [[PTR_ARR3:%.+]] = alloca [2 x i8*], - // CHECK: [[N_ADDR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[N_ADDR]], - // CHECK-64: [[N_EXT:%.+]] = zext i{{[0-9]+}} [[N_ADDR_VAL]] to i{{[0-9]+}} - // CHECK: [[SSAVE_RET:%.+]] = call i8* @llvm.stacksave() - // CHECK: store i8* [[SSAVE_RET]], i8** [[SSTACK]], - // CHECK-64: [[BN_VLA:%.+]] = alloca float, i{{[0-9]+}} [[N_EXT]], - // CHECK-32: [[BN_VLA:%.+]] = alloca float, i{{[0-9]+}} [[N_ADDR_VAL]], - // CHECK: [[N_ADDR_VAL2:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[N_ADDR]], - // CHECK-64: [[N_EXT2:%.+]] = zext i{{[0-9]+}} [[N_ADDR_VAL2]] to i{{[0-9]+}} - // CHECK-64: [[CN_SIZE:%.+]] = mul{{.+}} i{{[0-9]+}} 5, [[N_EXT2]] - // CHECK-32: [[CN_SIZE:%.+]] = mul{{.+}} i{{[0-9]+}} 5, [[N_ADDR_VAL2]] - // CHECK: [[CN_VLA:%.+]] = alloca double, i{{[0-9]+}} [[CN_SIZE]], - // CHECK: [[AVAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A]], - // CHECK-64: [[CONV:%.+]] = bitcast i{{[0-9]+}}* [[ACAST]] to i{{[0-9]+}}* - // CHECK-64: store i{{[0-9]+}} [[AVAL]], i{{[0-9]+}}* [[CONV]], - // CHECK-32: store i{{[0-9]+}} [[AVAL]], i{{[0-9]+}}* [[ACAST]], - // CHECK: [[ACAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[ACAST]], - // CHECK: [[P_PTR:%.+]] = load i32*, i32** [[P]], align 64 - // CHECK: [[BASE_PTR_GEP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BASE_PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[ACAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} [[ACAST_VAL]], i{{[0-9]+}}* [[ACAST_TOPTR]], - // CHECK: [[PTR_GEP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[ACAST_TOPTR2:%.+]] = bitcast i8** [[PTR_GEP]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} [[ACAST_VAL]], i{{[0-9]+}}* [[ACAST_TOPTR2]], - // CHECK: [[BASE_PTR_GEP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BASE_PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 - // CHECK: [[PCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP]] to i32*** - // CHECK: store i32** [[P]], i32*** [[PCAST_TOPTR]], - // CHECK: [[PTR_GEP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 - // CHECK: [[PCAST_TOPTR2:%.+]] = bitcast i8** [[PTR_GEP]] to i32** - // CHECK: store i32* [[P_PTR]], i32** [[PCAST_TOPTR2]], - // CHECK: [[BASE_PTR_GEP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BASE_PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 - // CHECK: [[PCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP]] to i{{64|32}}* - // CHECK: store i{{64|32}} [[GA_VAL:%.*]], i{{64|32}}* [[PCAST_TOPTR]], - // CHECK: [[PTR_GEP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 - // CHECK: [[PCAST_TOPTR2:%.+]] = bitcast i8** [[PTR_GEP]] to i{{64|32}}* - // CHECK: store i{{64|32}} [[GA_VAL]], i{{64|32}}* [[PCAST_TOPTR2]], - // CHECK: [[BASE_PTR_GEP_ARG:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BASE_PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[PTR_GEP_ARG:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: {{.+}} = call i32 @__tgt_target_kernel(%struct.ident_t* @{{.+}}, i64 -1, i32 -1, i32 0, i8* @.{{.+}}.region_id, %struct.__tgt_kernel_arguments* [[ARGS:%.+]]) - - // TCHECK: define weak_odr protected void @__omp_offloading_{{.+}}(i{{[0-9]+}} noundef [[A_IN:%.+]], i32** noundef nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) [[P_IN:%.+]], i{{[0-9]+}} noundef [[GA_IN:%.+]]) - // TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, - // TCHECK: [[P_ADDR:%.+]] = alloca i32**, - // TCHECK: [[GA_ADDR:%.+]] = alloca i{{64|32}}, - // TCHECK: [[P_PRIV:%.+]] = alloca i32*, - // TCHECK-NOT: alloca i{{[0-9]+}} - // TCHECK: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]], - // TCHECK: store i32** [[P_IN]], i32*** [[P_ADDR]], - // TCHECK: store i{{[0-9]+}} [[GA_IN]], i{{[0-9]+}}* [[GA_ADDR]], - // TCHECK-NOT: store i{{[0-9]+}} % - // TCHECK: ret void + #pragma omp target firstprivate(aa, b, bn, c, cn, d) { @@ -166,198 +78,44 @@ int foo(int n, double *ptr) { d.Y = 1; } - // CHECK: [[A2VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A2]], - // CHECK: [[A2CASTCONV:%.+]] = bitcast i{{[0-9]+}}* [[A2CAST]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} [[A2VAL]], i{{[0-9]+}}* [[A2CASTCONV]], - // CHECK: [[A2CAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A2CAST]], - // CHECK-64: [[BN_SIZE:%.+]] = mul{{.+}} i{{[0-9]+}} [[N_EXT]], 4 - // CHECK-32: [[BN_SZ_SIZE:%.+]] = mul{{.+}} i{{[0-9]+}} [[N_ADDR_VAL]], 4 - // CHECK-32: [[BN_SIZE:%.+]] = sext i32 [[BN_SZ_SIZE]] to i64 - // CHECK-64: [[CN_SIZE_1:%.+]] = mul{{.+}} i{{[0-9]+}} 5, [[N_EXT2]] - // CHECK-32: [[CN_SIZE_1:%.+]] = mul{{.+}} i{{[0-9]+}} 5, [[N_ADDR_VAL2]] - // CHECK-64: [[CN_SIZE_2:%.+]] = mul{{.+}} i{{[0-9]+}} [[CN_SIZE_1]], 8 - // CHECK-32: [[CN_SZ_SIZE_2:%.+]] = mul{{.+}} i{{[0-9]+}} [[CN_SIZE_1]], 8 - // CHECK-32: [[CN_SIZE_2:%.+]] = sext i32 [[CN_SZ_SIZE_2]] to i64 // firstprivate(aa) --> base_ptr = aa, ptr = aa, size = 2 (short) - // CHECK: [[BASE_PTR_GEP2_0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[ACAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP2_0]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} [[A2CAST_VAL]], i{{[0-9]+}}* [[ACAST_TOPTR]], - // CHECK: [[PTR_GEP2_0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[ACAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP2_0]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} [[A2CAST_VAL]], i{{[0-9]+}}* [[ACAST_TOPTR]], // firstprivate(b): base_ptr = &b[0], ptr = &b[0], size = 40 (sizeof(float)*10) - // CHECK: [[BASE_PTR_GEP2_1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP2_1]] to [10 x float]** - // CHECK: store [10 x float]* [[B]], [10 x float]** [[BCAST_TOPTR]], - // CHECK: [[PTR_GEP2_1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP2_1]] to [10 x float]** - // CHECK: store [10 x float]* [[B]], [10 x float]** [[BCAST_TOPTR]], // firstprivate(bn), 2 entries, n and bn: (1) base_ptr = n, ptr = n, size = 8 ; (2) base_ptr = &c[0], ptr = &c[0], size = n*sizeof(float) - // CHECK: [[BASE_PTR_GEP2_2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP2_2]] to i{{[0-9]+}}* - // CHECK-64: store i{{[0-9]+}} [[N_EXT]], i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK-32: store i{{[0-9]+}} [[N_ADDR_VAL]], i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK: [[PTR_GEP2_2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP2_2]] to i{{[0-9]+}}* - // CHECK-64: store i{{[0-9]+}} [[N_EXT]], i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK-32: store i{{[0-9]+}} [[N_ADDR_VAL]], i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK: [[BASE_PTR_GEP2_3:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP2_3]] to float** - // CHECK: store float* [[BN_VLA]], float** [[BCAST_TOPTR]], - // CHECK: [[SIZE_GEPBN_3:%.+]] = getelementptr inbounds [9 x i{{[0-9]+}}], [9 x i{{[0-9]+}}]* [[SIZET2]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 - // CHECK: store i{{[0-9]+}} [[BN_SIZE]], i{{[0-9]+}}* [[SIZE_GEPBN_3]] // firstprivate(c): base_ptr = &c[0], ptr = &c[0], size = 400 (5*10*sizeof(double)) - // CHECK: [[BASE_PTR_GEP2_4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 4 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP2_4]] to [5 x [10 x double]]** - // CHECK: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[BCAST_TOPTR]], - // CHECK: [[PTR_GEP2_4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 4 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP2_4]] to [5 x [10 x double]]** - // CHECK: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[BCAST_TOPTR]], // firstprivate(cn), 3 entries, 5, n, cn: (1) base_ptr = 5, ptr = 5, size = 8; (2) (1) base_ptr = n, ptr = n, size = 8; (3) base_ptr = &cn[0], ptr = &cn[0], size = 5*n*sizeof(double) - // CHECK: [[BASE_PTR_GEP2_5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 5 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP2_5]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} 5, i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK: [[PTR_GEP2_5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 5 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP2_5]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} 5, i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK: [[BASE_PTR_GEP2_6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 6 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP2_6]] to i{{[0-9]+}}* - // CHECK-64: store i{{[0-9]+}} [[N_EXT2]], i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK-32: store i{{[0-9]+}} [[N_ADDR_VAL2]], i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK: [[PTR_GEP2_6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 6 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP2_6]] to i{{[0-9]+}}* - // CHECK-64: store i{{[0-9]+}} [[N_EXT2]], i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK-32: store i{{[0-9]+}} [[N_ADDR_VAL2]], i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK: [[BASE_PTR_GEP2_7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 7 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP2_7]] to double** - // CHECK: store double* [[CN_VLA]], double** [[BCAST_TOPTR]], - // CHECK: [[PTR_GEP2_7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 7 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP2_7]] to double** - // CHECK: store double* [[CN_VLA]], double** [[BCAST_TOPTR]], - // CHECK: [[SIZE_GEPCN_7:%.+]] = getelementptr inbounds [9 x i{{[0-9]+}}], [9 x i{{[0-9]+}}]* [[SIZET2]], i{{[0-9]+}} 0, i{{[0-9]+}} 7 - // CHECK: store i{{[0-9]+}} [[CN_SIZE_2]], i{{[0-9]+}}* [[SIZE_GEPCN_7]], // firstprivate(d): base_ptr = &d, ptr = &d, size = 16 - // CHECK: [[BASE_PTR_GEP2_8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 8 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP2_8]] to [[TT]]** - // CHECK: store [[TT]]* [[D]], [[TT]]** [[BCAST_TOPTR]], - // CHECK: [[PTR_GEP2_8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 8 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP2_8]] to [[TT]]** - // CHECK: store [[TT]]* [[D]], [[TT]]** [[BCAST_TOPTR]], - - // CHECK: [[BASE_PTR_GEP_ARG2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[PTR_GEP_ARG2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[SIZES_ARG2:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[SIZET2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: {{.+}} = call i32 @__tgt_target_kernel(%struct.ident_t* @{{.+}}, i64 -1, i32 -1, i32 0, i8* @.{{.+}}.region_id, %struct.__tgt_kernel_arguments* [[ARGS:%.+]]) + // make sure that firstprivate variables are generated in all cases and that we use those instances for operations inside the // target region - // TCHECK: define {{.*}}void @__omp_offloading_{{.+}}(i{{[0-9]+}} noundef [[A2_IN:%.+]], [10 x float]* {{.+}} [[B_IN:%.+]], i{{[0-9]+}} noundef [[BN_SZ:%.+]], float* {{.+}} [[BN_IN:%.+]], [5 x [10 x double]]* {{.+}} [[C_IN:%.+]], i{{[0-9]+}} noundef [[CN_SZ1:%.+]], i{{[0-9]+}} noundef [[CN_SZ2:%.+]], double* {{.+}} [[CN_IN:%.+]], [[TT]]* {{.+}} [[D_IN:%.+]]) - // TCHECK: [[A2_ADDR:%.+]] = alloca i{{[0-9]+}}, - // TCHECK: [[B_ADDR:%.+]] = alloca [10 x float]*, - // TCHECK: [[VLA_ADDR:%.+]] = alloca i{{[0-9]+}}, - // TCHECK: [[BN_ADDR:%.+]] = alloca float*, - // TCHECK: [[C_ADDR:%.+]] = alloca [5 x [10 x double]]*, - // TCHECK: [[VLA_ADDR2:%.+]] = alloca i{{[0-9]+}}, - // TCHECK: [[VLA_ADDR4:%.+]] = alloca i{{[0-9]+}}, - // TCHECK: [[CN_ADDR:%.+]] = alloca double*, - // TCHECK: [[D_ADDR:%.+]] = alloca [[TT]]*, - // TCHECK-NOT: alloca i{{[0-9]+}}, - // TCHECK: [[B_PRIV:%.+]] = alloca [10 x float], - // TCHECK: [[SSTACK:%.+]] = alloca i8*, - // TCHECK: [[C_PRIV:%.+]] = alloca [5 x [10 x double]], - // TCHECK: [[D_PRIV:%.+]] = alloca [[TT]], - // TCHECK: store i{{[0-9]+}} [[A2_IN]], i{{[0-9]+}}* [[A2_ADDR]], - // TCHECK: store [10 x float]* [[B_IN]], [10 x float]** [[B_ADDR]], - // TCHECK: store i{{[0-9]+}} [[BN_SZ]], i{{[0-9]+}}* [[VLA_ADDR]], - // TCHECK: store float* [[BN_IN]], float** [[BN_ADDR]], - // TCHECK: store [5 x [10 x double]]* [[C_IN]], [5 x [10 x double]]** [[C_ADDR]], - // TCHECK: store i{{[0-9]+}} [[CN_SZ1]], i{{[0-9]+}}* [[VLA_ADDR2]], - // TCHECK: store i{{[0-9]+}} [[CN_SZ2]], i{{[0-9]+}}* [[VLA_ADDR4]], - // TCHECK: store double* [[CN_IN]], double** [[CN_ADDR]], - // TCHECK: store [[TT]]* [[D_IN]], [[TT]]** [[D_ADDR]], - // TCHECK: [[CONV_A2ADDR:%.+]] = bitcast i{{[0-9]+}}* [[A2_ADDR]] to i{{[0-9]+}}* - // TCHECK: [[B_ADDR_REF:%.+]] = load [10 x float]*, [10 x float]** [[B_ADDR]], - // TCHECK: [[BN_SZ_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[VLA_ADDR]], - // TCHECK: [[BN_ADDR_REF:%.+]] = load float*, float** [[BN_ADDR]], - // TCHECK: [[C_ADDR_REF:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], - // TCHECK: [[CN_SZ1_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[VLA_ADDR2]], - // TCHECK: [[CN_SZ2_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[VLA_ADDR4]], - // TCHECK: [[CN_ADDR_REF:%.+]] = load double*, double** [[CN_ADDR]], - // TCHECK: [[D_ADDR_REF:%.+]] = load [[TT]]*, [[TT]]** [[D_ADDR]], // firstprivate(aa): a_priv = a_in - // TCHECK-NOT: store i{{[0-9]+}} % // firstprivate(b): memcpy(b_priv,b_in) - // TCHECK: [[B_PRIV_BCAST:%.+]] = bitcast [10 x float]* [[B_PRIV]] to i8* - // TCHECK: [[B_ADDR_REF_BCAST:%.+]] = bitcast [10 x float]* [[B_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[B_PRIV_BCAST]], i8* align {{[0-9]+}} [[B_ADDR_REF_BCAST]], {{.+}}) - // TCHECK: [[RET_STACK:%.+]] = call i8* @llvm.stacksave() - // TCHECK: store i8* [[RET_STACK]], i8** [[SSTACK]], // firstprivate(bn) - // TCHECK: [[BN_PRIV:%.+]] = alloca float, i{{[0-9]+}} [[BN_SZ_VAL]], - // TCHECK: [[BN_COPY_SZ:%.+]] = mul{{.+}} i{{[0-9]+}} [[BN_SZ_VAL]], 4 - // TCHECK: [[BN_PRIV__BCAST:%.+]] = bitcast float* [[BN_PRIV]] to i8* - // TCHECK: [[BN_REF_IN_BCAST:%.+]] = bitcast float* [[BN_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BN_PRIV__BCAST]], i8* align {{[0-9]+}} [[BN_REF_IN_BCAST]], i{{[0-9]+}} [[BN_COPY_SZ]],{{.+}}) // firstprivate(c) - // TCHECK: [[C_PRIV_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C_PRIV]] to i8* - // TCHECK: [[C_IN_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[C_PRIV_BCAST]], i8* align {{[0-9]+}} [[C_IN_BCAST]],{{.+}}) // firstprivate(cn) - // TCHECK: [[CN_SZ:%.+]] = mul{{.+}} i{{[0-9]+}} [[CN_SZ1_VAL]], [[CN_SZ2_VAL]] - // TCHECK: [[CN_PRIV:%.+]] = alloca double, i{{[0-9]+}} [[CN_SZ]], - // TCHECK: [[CN_SZ2:%.+]] = mul{{.+}} i{{[0-9]+}} [[CN_SZ1_VAL]], [[CN_SZ2_VAL]] - // TCHECK: [[CN_SZ2_CPY:%.+]] = mul{{.+}} i{{[0-9]+}} [[CN_SZ2]], 8 - // TCHECK: [[CN_PRIV_BCAST:%.+]] = bitcast double* [[CN_PRIV]] to i8* - // TCHECK: [[CN_IN_BCAST:%.+]] = bitcast double* [[CN_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[CN_PRIV_BCAST]], i8* align {{[0-9]+}} [[CN_IN_BCAST]], i{{[0-9]+}} [[CN_SZ2_CPY]],{{.+}}) // firstprivate(d) - // TCHECK: [[D_PRIV_BCAST:%.+]] = bitcast [[TT]]* [[D_PRIV]] to i8* - // TCHECK: [[D_IN_BCAST:%.+]] = bitcast [[TT]]* [[D_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[D_PRIV_BCAST]], i8* align {{[0-9]+}} [[D_IN_BCAST]],{{.+}}) #pragma omp target firstprivate(ptr, e) { ptr[0] = e.X; ptr[0]++; } - // CHECK: [[PTR_ADDR_REF:%.+]] = load double*, double** [[PTR_ADDR]], - - // CHECK: [[BASE_PTR_GEP3_0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BASE_PTR_ARR3]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP3_0]] to double** - // CHECK: store double* [[PTR_ADDR_REF]], double** [[BCAST_TOPTR]], - // CHECK: [[PTR_GEP3_0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTR_ARR3]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP3_0]] to double** - // CHECK: store double* [[PTR_ADDR_REF]], double** [[BCAST_TOPTR]], - // CHECK: [[BASE_PTR_GEP3_1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BASE_PTR_ARR3]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP3_1]] to [[TTII]]** - // CHECK: store [[TTII]]* [[FP_E]], [[TTII]]** [[BCAST_TOPTR]], - // CHECK: [[PTR_GEP3_1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTR_ARR3]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP3_1]] to [[TTII]]** - // CHECK: store [[TTII]]* [[FP_E]], [[TTII]]** [[BCAST_TOPTR]], - - // CHECK: [[BASE_PTR_GEP_ARG3:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BASE_PTR_ARR3]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[PTR_GEP_ARG3:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTR_ARR3]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: {{.+}} = call i32 @__tgt_target_kernel(%struct.ident_t* @{{.+}}, i64 -1, i32 -1, i32 0, i8* @.{{.+}}.region_id, %struct.__tgt_kernel_arguments* [[ARGS:%.+]]) - - // TCHECK: define weak_odr protected void @__omp_offloading_{{.+}}(double* noundef [[PTR_IN:%.+]], [[TTII]]* noundef nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) [[E:%.+]]) - // TCHECK: [[PTR_ADDR:%.+]] = alloca double*, - // TCHECK-NOT: alloca [[TTII]], - // TCHECK-NOT: alloca double*, - // TCHECK: store double* [[PTR_IN]], double** [[PTR_ADDR]], - // TCHECK-NOT: store double* % + + + return a; } @@ -391,30 +149,13 @@ static int fstatic(int n) { return a; } -// TCHECK: define weak_odr protected void @__omp_offloading_{{.+}}(i{{[0-9]+}} noundef [[A_IN:%.+]], i{{[0-9]+}} noundef [[A3_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) -// TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, -// TCHECK: [[A3_ADDR:%.+]] = alloca i{{[0-9]+}}, -// TCHECK: [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*, -// TCHECK-NOT: alloca i{{[0-9]+}}, -// TCHECK: [[B_PRIV:%.+]] = alloca [10 x i{{[0-9]+}}], -// TCHECK: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]], -// TCHECK: store i{{[0-9]+}} [[A3_IN]], i{{[0-9]+}}* [[A3_ADDR]], -// TCHECK: store [10 x i{{[0-9]+}}]* [[B_IN]], [10 x i{{[0-9]+}}]** [[B_ADDR]], -// TCHECK-64: [[A_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A_ADDR]] to i{{[0-9]+}}* -// TCHECK: [[A3_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A3_ADDR]] to i8* -// TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** [[B_ADDR]], // firstprivate(a): a_priv = a_in // firstprivate(aaa) -// TCHECK-NOT: store i{{[0-9]+}} % // firstprivate(b) -// TCHECK: [[B_PRIV_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_PRIV]] to i8* -// TCHECK: [[B_IN_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_ADDR_REF]] to i8* -// TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[B_PRIV_BCAST]], i8* align {{[0-9]+}} [[B_IN_BCAST]],{{.+}}) -// TCHECK: ret void struct S1 { double a; @@ -433,123 +174,33 @@ struct S1 { } // on the host side, we first generate r1, then the static function and the template above - // CHECK: define{{.+}} i32 {{.+}}([[S1]]* {{.+}}, i{{[0-9]+}} {{.+}}) - // CHECK: [[BASE_PTRS4:%.+]] = alloca [5 x i8*], - // CHECK: [[PTRS4:%.+]] = alloca [5 x i8*], - // CHECK: [[SIZET4:%.+]] = alloca [5 x i{{[0-9]+}}], // map(this: this ptr is implicitly captured (not firstprivate matter) - // CHECK: [[BP0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BASE_PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[CBP0:%.+]] = bitcast i8** [[BP0]] to %struct.S1** - // CHECK: store %struct.S1* [[THIS:%.+]], %struct.S1** [[CBP0]], - // CHECK: [[P0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[CP0:%.+]] = bitcast i8** [[P0]] to double** - // CHECK: store double* [[A:%.+]], double** [[CP0]], // firstprivate(b): base_ptr = b, ptr = b, size = 4 (pass by-value) - // CHECK: [[BASE_PTRS_GEP4_1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BASE_PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTRS_GEP4_1]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} [[B_CAST:%.+]], i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK: [[PTRS_GEP4_1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTRS_GEP4_1]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} [[B_CAST]], i{{[0-9]+}}* [[BCAST_TOPTR]], // firstprivate(c), 3 entries: 2, n, c - // CHECK: [[BASE_PTRS_GEP4_2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BASE_PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTRS_GEP4_2]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK: [[PTRS_GEP4_2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTRS_GEP4_2]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK: [[BASE_PTRS_GEP4_3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BASE_PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTRS_GEP4_3]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} [[N:%.+]], i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK: [[PTRS_GEP4_3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTRS_GEP4_3]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} [[N]], i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK: [[BASE_PTRS_GEP4_4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BASE_PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 4 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTRS_GEP4_4]] to i{{[0-9]+}}** - // CHECK: store i{{[0-9]+}}* [[B:%.+]], i{{[0-9]+}}** [[BCAST_TOPTR]], - // CHECK: [[PTRS_GEP4_4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 4 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTRS_GEP4_4]] to i{{[0-9]+}}** - // CHECK: store i{{[0-9]+}}* [[B]], i{{[0-9]+}}** [[BCAST_TOPTR]], - // CHECK: [[SIZES_GEP4_4:%.+]] = getelementptr inbounds [5 x i{{[0-9]+}}], [5 x i{{[0-9]+}}]* [[SIZET4]], i{{[0-9]+}} 0, i{{[0-9]+}} 4 - // CHECK: store i{{[0-9]+}} [[B_SIZE:%.+]], i{{[0-9]+}}* [[SIZES_GEP4_4]], // only check that we use the map types stored in the global variable - // CHECK: {{.+}} = call i32 @__tgt_target_kernel(%struct.ident_t* @{{.+}}, i64 -1, i32 -1, i32 0, i8* @.{{.+}}.region_id, %struct.__tgt_kernel_arguments* [[ARGS:%.+]]) - - // TCHECK: define weak_odr protected void @__omp_offloading_{{.+}}([[S1]]* noundef [[TH:%.+]], i{{[0-9]+}} noundef [[B_IN:%.+]], i{{[0-9]+}} noundef [[VLA:%.+]], i{{[0-9]+}} noundef [[VLA1:%.+]], i{{[0-9]+}}{{.+}} [[C_IN:%.+]]) - // TCHECK: [[TH_ADDR:%.+]] = alloca [[S1]]*, - // TCHECK: [[B_ADDR:%.+]] = alloca i{{[0-9]+}}, - // TCHECK: [[VLA_ADDR:%.+]] = alloca i{{[0-9]+}}, - // TCHECK: [[VLA_ADDR2:%.+]] = alloca i{{[0-9]+}}, - // TCHECK: [[C_ADDR:%.+]] = alloca i{{[0-9]+}}*, - // TCHECK-NOT: alloca i{{[0-9]+}}, - // TCHECK: [[SSTACK:%.+]] = alloca i8*, - - // TCHECK: store [[S1]]* [[TH]], [[S1]]** [[TH_ADDR]], - // TCHECK: store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[B_ADDR]], - // TCHECK: store i{{[0-9]+}} [[VLA]], i{{[0-9]+}}* [[VLA_ADDR]], - // TCHECK: store i{{[0-9]+}} [[VLA1]], i{{[0-9]+}}* [[VLA_ADDR2]], - // TCHECK: store i{{[0-9]+}}* [[C_IN]], i{{[0-9]+}}** [[C_ADDR]], - // TCHECK: [[TH_ADDR_REF:%.+]] = load [[S1]]*, [[S1]]** [[TH_ADDR]], - // TCHECK-64: [[B_ADDR_CONV:%.+]] = bitcast i{{[0-9]+}}* [[B_ADDR]] to i{{[0-9]+}}* - // TCHECK: [[VLA_ADDR_REF:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[VLA_ADDR]], - // TCHECK: [[VLA_ADDR_REF2:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[VLA_ADDR2]], - // TCHECK: [[C_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[C_ADDR]], + + // firstprivate(b) - // TCHECK-NOT: store i{{[0-9]+}} % - // TCHECK: [[RET_STACK:%.+]] = call i8* @llvm.stacksave() - // TCHECK: store i8* [[RET_STACK:%.+]], i8** [[SSTACK]], // firstprivate(c) - // TCHECK: [[C_SZ:%.+]] = mul{{.+}} i{{[0-9]+}} [[VLA_ADDR_REF]], [[VLA_ADDR_REF2]] - // TCHECK: [[C_PRIV:%.+]] = alloca i{{[0-9]+}}, i{{[0-9]+}} [[C_SZ]], - // TCHECK: [[C_SZ2:%.+]] = mul{{.+}} i{{[0-9]+}} [[VLA_ADDR_REF]], [[VLA_ADDR_REF2]] - // TCHECK: [[C_SZ_CPY:%.+]] = mul{{.+}} i{{[0-9]+}} [[C_SZ2]], 2 - // TCHECK: [[C_PRIV_BCAST:%.+]] = bitcast i{{[0-9]+}}* [[C_PRIV]] to i8* - // TCHECK: [[C_IN_BCAST:%.+]] = bitcast i{{[0-9]+}}* [[C_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[C_PRIV_BCAST]], i8* align {{[0-9]+}} [[C_IN_BCAST]],{{.+}}) // finish - // TCHECK: [[RELOAD_SSTACK:%.+]] = load i8*, i8** [[SSTACK]], - // TCHECK: call void @llvm.stackrestore(i8* [[RELOAD_SSTACK]]) - // TCHECK: ret void // static host function - // CHECK: define{{.+}} i32 {{.+}}(i{{[0-9]+}} {{.+}}) - // CHECK: [[BASE_PTRS5:%.+]] = alloca [3 x i8*], - // CHECK: [[PTRS5:%.+]] = alloca [3 x i8*], // firstprivate(a): by value - // CHECK: [[BASE_PTRS_GEP5_0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BASE_PTRS5]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTRS_GEP5_0]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} [[A_CAST:%.+]], i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK: [[PTRS_GEP5_0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PTRS5]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTRS_GEP5_0]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} [[A_CAST]], i{{[0-9]+}}* [[BCAST_TOPTR]], // firstprivate(aaa): by value - // CHECK: [[BASE_PTRS_GEP5_1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BASE_PTRS5]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTRS_GEP5_1]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} [[A3_CAST:%.+]], i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK: [[PTRS_GEP5_1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PTRS5]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTRS_GEP5_1]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} [[A3_CAST]], i{{[0-9]+}}* [[BCAST_TOPTR]], // firstprivate(b): base_ptr = &b[0], ptr= &b[0] - // CHECK: [[BASE_PTRS_GEP5_2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BASE_PTRS5]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTRS_GEP5_2]] to [10 x i{{[0-9]+}}]** - // CHECK: store [10 x i{{[0-9]+}}]* [[B:%.+]], [10 x i{{[0-9]+}}]** [[BCAST_TOPTR]], - // CHECK: [[PTRS_GEP5_2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PTRS5]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTRS_GEP5_2]] to [10 x i{{[0-9]+}}]** - // CHECK: store [10 x i{{[0-9]+}}]* [[B]], [10 x i{{[0-9]+}}]** [[BCAST_TOPTR]], // only check that the right sizes and map types are used - // CHECK: {{.+}} = call i32 @__tgt_target_kernel(%struct.ident_t* @{{.+}}, i64 -1, i32 -1, i32 0, i8* @.{{.+}}.region_id, %struct.__tgt_kernel_arguments* [[ARGS:%.+]]) }; int bar(int n, double *ptr) { @@ -565,46 +216,11786 @@ int bar(int n, double *ptr) { // template host and device -// CHECK: define{{.+}} i32 {{.+}}(i{{[0-9]+}} {{.+}}) -// CHECK: [[BASE_PTRS6:%.+]] = alloca [2 x i8*], -// CHECK: [[PTRS6:%.+]] = alloca [2 x i8*], // firstprivate(a): by value -// CHECK: [[BASE_PTRS_GEP6_0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BASE_PTRS6]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTRS_GEP6_0]] to i{{[0-9]+}}* -// CHECK: store i{{[0-9]+}} [[AT_CAST:%.+]], i{{[0-9]+}}* [[BCAST_TOPTR]], -// CHECK: [[PTRS_GEP6_0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTRS6]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTRS_GEP6_0]] to i{{[0-9]+}}* -// CHECK: store i{{[0-9]+}} [[AT_CAST]], i{{[0-9]+}}* [[BCAST_TOPTR]], // firstprivate(b): pointer -// CHECK: [[BASE_PTRS_GEP6_1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BASE_PTRS6]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTRS_GEP6_1]] to [10 x i{{[0-9]+}}]** -// CHECK: store [10 x i{{[0-9]+}}]* [[B:%.+]], [10 x i{{[0-9]+}}]** [[BCAST_TOPTR]], -// CHECK: [[PTRS_GEP6_1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTRS6]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTRS_GEP6_1]] to [10 x i{{[0-9]+}}]** -// CHECK: store [10 x i{{[0-9]+}}]* [[B]], [10 x i{{[0-9]+}}]** [[BCAST_TOPTR]], - -// CHECK: {{.+}} = call i32 @__tgt_target_kernel(%struct.ident_t* @{{.+}}, i64 -1, i32 -1, i32 0, i8* @.{{.+}}.region_id, %struct.__tgt_kernel_arguments* [[ARGS:%.+]]) - -// TCHECK: define weak_odr protected void @__omp_offloading_{{.+}}(i{{[0-9]+}} noundef [[A_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) -// TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, -// TCHECK: [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*, -// TCHECK-NOT: alloca i{{[0-9]+}}, -// TCHECK: [[B_PRIV:%.+]] = alloca [10 x i{{[0-9]+}}], -// TCHECK: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]], -// TCHECK: store [10 x i{{[0-9]+}}]* [[B_IN]], [10 x i{{[0-9]+}}]** [[B_ADDR]], -// TCHECK-64: [[A_ADDR_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A_ADDR]] to i{{[0-9]+}}* -// TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** [[B_ADDR]], + + // firstprivate(a) -// TCHECK-NOT: store i{{[0-9]+}} % // firstprivate(b) -// TCHECK: [[B_PRIV_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_PRIV]] to i8* -// TCHECK: [[B_IN_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_ADDR_REF]] to i8* -// TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[B_PRIV_BCAST]], i8* align {{[0-9]+}} [[B_IN_BCAST]],{{.+}}) -// TCHECK: ret void #endif +// CHECK-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@_Z3fooiPd +// CHECK-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK-64-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// CHECK-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// CHECK-64-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK-64-NEXT: [[P:%.*]] = alloca i32*, align 64 +// CHECK-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[GA_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8 +// CHECK-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [9 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_PTRS5:%.*]] = alloca [9 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [9 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK-64-NEXT: store i16 0, i16* [[AA]], align 2 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK-64-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// CHECK-64-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8 +// CHECK-64-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4 +// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// CHECK-64-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]] +// CHECK-64-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8 +// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// CHECK-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP6]], i32* [[X]], align 4 +// CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[Y]], align 4 +// CHECK-64-NEXT: store i32* [[A]], i32** [[P]], align 64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK-64-NEXT: store i32 [[TMP8]], i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32*, i32** [[P]], align 64 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* @ga, align 4 +// CHECK-64-NEXT: [[CONV2:%.*]] = bitcast i64* [[GA_CASTED]] to i32* +// CHECK-64-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i64, i64* [[GA_CASTED]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64* +// CHECK-64-NEXT: store i64 [[TMP9]], i64* [[TMP14]], align 8 +// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64* +// CHECK-64-NEXT: store i64 [[TMP9]], i64* [[TMP16]], align 8 +// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK-64-NEXT: store i8* null, i8** [[TMP17]], align 8 +// CHECK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32** +// CHECK-64-NEXT: store i32* [[TMP10]], i32** [[TMP19]], align 8 +// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32** +// CHECK-64-NEXT: store i32* [[TMP10]], i32** [[TMP21]], align 8 +// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK-64-NEXT: store i8* null, i8** [[TMP22]], align 8 +// CHECK-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64* +// CHECK-64-NEXT: store i64 [[TMP12]], i64* [[TMP24]], align 8 +// CHECK-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64* +// CHECK-64-NEXT: store i64 [[TMP12]], i64* [[TMP26]], align 8 +// CHECK-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK-64-NEXT: store i8* null, i8** [[TMP27]], align 8 +// CHECK-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK-64-NEXT: store i32 2, i32* [[TMP30]], align 4 +// CHECK-64-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK-64-NEXT: store i32 3, i32* [[TMP31]], align 4 +// CHECK-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK-64-NEXT: store i8** [[TMP28]], i8*** [[TMP32]], align 8 +// CHECK-64-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK-64-NEXT: store i8** [[TMP29]], i8*** [[TMP33]], align 8 +// CHECK-64-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP34]], align 8 +// CHECK-64-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP35]], align 8 +// CHECK-64-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP36]], align 8 +// CHECK-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP37]], align 8 +// CHECK-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK-64-NEXT: store i64 0, i64* [[TMP38]], align 8 +// CHECK-64-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK-64-NEXT: store i64 0, i64* [[TMP39]], align 8 +// CHECK-64-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP40]], align 4 +// CHECK-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP41]], align 4 +// CHECK-64-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK-64-NEXT: store i32 0, i32* [[TMP42]], align 4 +// CHECK-64-NEXT: [[TMP43:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK-64-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0 +// CHECK-64-NEXT: br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK-64: omp_offload.failed: +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i64 [[TMP9]], i32* [[TMP10]], i64 [[TMP12]]) #[[ATTR3:[0-9]+]] +// CHECK-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK-64: omp_offload.cont: +// CHECK-64-NEXT: [[TMP45:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK-64-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* +// CHECK-64-NEXT: store i16 [[TMP45]], i16* [[CONV3]], align 2 +// CHECK-64-NEXT: [[TMP46:%.*]] = load i64, i64* [[AA_CASTED]], align 8 +// CHECK-64-NEXT: [[TMP47:%.*]] = mul nuw i64 [[TMP1]], 4 +// CHECK-64-NEXT: [[TMP48:%.*]] = mul nuw i64 5, [[TMP4]] +// CHECK-64-NEXT: [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 8 +// CHECK-64-NEXT: [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP50]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i64 72, i1 false) +// CHECK-64-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i64* +// CHECK-64-NEXT: store i64 [[TMP46]], i64* [[TMP52]], align 8 +// CHECK-64-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i64* +// CHECK-64-NEXT: store i64 [[TMP46]], i64* [[TMP54]], align 8 +// CHECK-64-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 0 +// CHECK-64-NEXT: store i8* null, i8** [[TMP55]], align 8 +// CHECK-64-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]** +// CHECK-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 8 +// CHECK-64-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]** +// CHECK-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 8 +// CHECK-64-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 1 +// CHECK-64-NEXT: store i8* null, i8** [[TMP60]], align 8 +// CHECK-64-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i64* +// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[TMP62]], align 8 +// CHECK-64-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i64* +// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[TMP64]], align 8 +// CHECK-64-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 2 +// CHECK-64-NEXT: store i8* null, i8** [[TMP65]], align 8 +// CHECK-64-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3 +// CHECK-64-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float** +// CHECK-64-NEXT: store float* [[VLA]], float** [[TMP67]], align 8 +// CHECK-64-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3 +// CHECK-64-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float** +// CHECK-64-NEXT: store float* [[VLA]], float** [[TMP69]], align 8 +// CHECK-64-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3 +// CHECK-64-NEXT: store i64 [[TMP47]], i64* [[TMP70]], align 8 +// CHECK-64-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 3 +// CHECK-64-NEXT: store i8* null, i8** [[TMP71]], align 8 +// CHECK-64-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 4 +// CHECK-64-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]** +// CHECK-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 8 +// CHECK-64-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 4 +// CHECK-64-NEXT: [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]** +// CHECK-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 8 +// CHECK-64-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 4 +// CHECK-64-NEXT: store i8* null, i8** [[TMP76]], align 8 +// CHECK-64-NEXT: [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 5 +// CHECK-64-NEXT: [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64* +// CHECK-64-NEXT: store i64 5, i64* [[TMP78]], align 8 +// CHECK-64-NEXT: [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 5 +// CHECK-64-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64* +// CHECK-64-NEXT: store i64 5, i64* [[TMP80]], align 8 +// CHECK-64-NEXT: [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 5 +// CHECK-64-NEXT: store i8* null, i8** [[TMP81]], align 8 +// CHECK-64-NEXT: [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 6 +// CHECK-64-NEXT: [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64* +// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[TMP83]], align 8 +// CHECK-64-NEXT: [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 6 +// CHECK-64-NEXT: [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64* +// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[TMP85]], align 8 +// CHECK-64-NEXT: [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 6 +// CHECK-64-NEXT: store i8* null, i8** [[TMP86]], align 8 +// CHECK-64-NEXT: [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 7 +// CHECK-64-NEXT: [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double** +// CHECK-64-NEXT: store double* [[VLA1]], double** [[TMP88]], align 8 +// CHECK-64-NEXT: [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 7 +// CHECK-64-NEXT: [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double** +// CHECK-64-NEXT: store double* [[VLA1]], double** [[TMP90]], align 8 +// CHECK-64-NEXT: [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7 +// CHECK-64-NEXT: store i64 [[TMP49]], i64* [[TMP91]], align 8 +// CHECK-64-NEXT: [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 7 +// CHECK-64-NEXT: store i8* null, i8** [[TMP92]], align 8 +// CHECK-64-NEXT: [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 8 +// CHECK-64-NEXT: [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT** +// CHECK-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 8 +// CHECK-64-NEXT: [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 8 +// CHECK-64-NEXT: [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT** +// CHECK-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 8 +// CHECK-64-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 8 +// CHECK-64-NEXT: store i8* null, i8** [[TMP97]], align 8 +// CHECK-64-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK-64-NEXT: [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK-64-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 0 +// CHECK-64-NEXT: store i32 2, i32* [[TMP101]], align 4 +// CHECK-64-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 1 +// CHECK-64-NEXT: store i32 9, i32* [[TMP102]], align 4 +// CHECK-64-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 2 +// CHECK-64-NEXT: store i8** [[TMP98]], i8*** [[TMP103]], align 8 +// CHECK-64-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 3 +// CHECK-64-NEXT: store i8** [[TMP99]], i8*** [[TMP104]], align 8 +// CHECK-64-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 4 +// CHECK-64-NEXT: store i64* [[TMP100]], i64** [[TMP105]], align 8 +// CHECK-64-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 5 +// CHECK-64-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 8 +// CHECK-64-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 6 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP107]], align 8 +// CHECK-64-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 7 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP108]], align 8 +// CHECK-64-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 8 +// CHECK-64-NEXT: store i64 0, i64* [[TMP109]], align 8 +// CHECK-64-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 9 +// CHECK-64-NEXT: store i64 0, i64* [[TMP110]], align 8 +// CHECK-64-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 10 +// CHECK-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4 +// CHECK-64-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 11 +// CHECK-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4 +// CHECK-64-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 12 +// CHECK-64-NEXT: store i32 0, i32* [[TMP113]], align 4 +// CHECK-64-NEXT: [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]]) +// CHECK-64-NEXT: [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0 +// CHECK-64-NEXT: br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]] +// CHECK-64: omp_offload.failed8: +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i64 [[TMP46]], [10 x float]* [[B]], i64 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP4]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]] +// CHECK-64-NEXT: br label [[OMP_OFFLOAD_CONT9]] +// CHECK-64: omp_offload.cont9: +// CHECK-64-NEXT: [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double** +// CHECK-64-NEXT: store double* [[TMP116]], double** [[TMP118]], align 8 +// CHECK-64-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double** +// CHECK-64-NEXT: store double* [[TMP116]], double** [[TMP120]], align 8 +// CHECK-64-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0 +// CHECK-64-NEXT: store i8* null, i8** [[TMP121]], align 8 +// CHECK-64-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0** +// CHECK-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 8 +// CHECK-64-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0** +// CHECK-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 8 +// CHECK-64-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1 +// CHECK-64-NEXT: store i8* null, i8** [[TMP126]], align 8 +// CHECK-64-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CHECK-64-NEXT: [[KERNEL_ARGS13:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK-64-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 0 +// CHECK-64-NEXT: store i32 2, i32* [[TMP129]], align 4 +// CHECK-64-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 1 +// CHECK-64-NEXT: store i32 2, i32* [[TMP130]], align 4 +// CHECK-64-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 2 +// CHECK-64-NEXT: store i8** [[TMP127]], i8*** [[TMP131]], align 8 +// CHECK-64-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 3 +// CHECK-64-NEXT: store i8** [[TMP128]], i8*** [[TMP132]], align 8 +// CHECK-64-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 4 +// CHECK-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 8 +// CHECK-64-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 5 +// CHECK-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 8 +// CHECK-64-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 6 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP135]], align 8 +// CHECK-64-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 7 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP136]], align 8 +// CHECK-64-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 8 +// CHECK-64-NEXT: store i64 0, i64* [[TMP137]], align 8 +// CHECK-64-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 9 +// CHECK-64-NEXT: store i64 0, i64* [[TMP138]], align 8 +// CHECK-64-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 10 +// CHECK-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4 +// CHECK-64-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 11 +// CHECK-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4 +// CHECK-64-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 12 +// CHECK-64-NEXT: store i32 0, i32* [[TMP141]], align 4 +// CHECK-64-NEXT: [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]]) +// CHECK-64-NEXT: [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0 +// CHECK-64-NEXT: br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED14:%.*]], label [[OMP_OFFLOAD_CONT15:%.*]] +// CHECK-64: omp_offload.failed14: +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]] +// CHECK-64-NEXT: br label [[OMP_OFFLOAD_CONT15]] +// CHECK-64: omp_offload.cont15: +// CHECK-64-NEXT: [[TMP144:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK-64-NEXT: call void @llvm.stackrestore(i8* [[TMP145]]) +// CHECK-64-NEXT: ret i32 [[TMP144]] +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// CHECK-64-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[GA]], i64* [[GA_ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32* +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// CHECK-64-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 +// CHECK-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 +// CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 +// CHECK-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 +// CHECK-64-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 +// CHECK-64-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// CHECK-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// CHECK-64-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 +// CHECK-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// CHECK-64-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 +// CHECK-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// CHECK-64-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 +// CHECK-64-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 +// CHECK-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK-64-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// CHECK-64-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false) +// CHECK-64-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// CHECK-64-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8 +// CHECK-64-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4 +// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// CHECK-64-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false) +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false) +// CHECK-64-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8 +// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// CHECK-64-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8 +// CHECK-64-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 +// CHECK-64-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// CHECK-64-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false) +// CHECK-64-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// CHECK-64-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false) +// CHECK-64-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK-64-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// CHECK-64-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK-64-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2 +// CHECK-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK-64-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3 +// CHECK-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// CHECK-64-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1 +// CHECK-64-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2 +// CHECK-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// CHECK-64-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]] +// CHECK-64-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]] +// CHECK-64-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3 +// CHECK-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// CHECK-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// CHECK-64-NEXT: store i64 1, i64* [[X]], align 8 +// CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// CHECK-64-NEXT: store i8 1, i8* [[Y]], align 8 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK-64-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// CHECK-64-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK-64-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8 +// CHECK-64-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8* +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 8, i1 false) +// CHECK-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[X]], align 4 +// CHECK-64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP3]] to double +// CHECK-64-NEXT: [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 0 +// CHECK-64-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK-64-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 0 +// CHECK-64-NEXT: [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 8 +// CHECK-64-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// CHECK-64-NEXT: store double [[INC]], double* [[ARRAYIDX2]], align 8 +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@_Z3bariPd +// CHECK-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 +// CHECK-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK-64-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]]) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]]) +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// CHECK-64-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]]) +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// CHECK-64-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: ret i32 [[TMP9]] +// CHECK-64-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// CHECK-64-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8 +// CHECK-64-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// CHECK-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK-64-NEXT: [[TMP3:%.*]] = call i8* @llvm.stacksave() +// CHECK-64-NEXT: store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]] +// CHECK-64-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2 +// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to i32* +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 +// CHECK-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP7:%.*]] = mul nuw i64 2, [[TMP2]] +// CHECK-64-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 +// CHECK-64-NEXT: [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP9]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i64 40, i1 false) +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1** +// CHECK-64-NEXT: store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double** +// CHECK-64-NEXT: store double* [[A]], double** [[TMP13]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK-64-NEXT: store i8* null, i8** [[TMP14]], align 8 +// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64* +// CHECK-64-NEXT: store i64 [[TMP6]], i64* [[TMP16]], align 8 +// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i64* +// CHECK-64-NEXT: store i64 [[TMP6]], i64* [[TMP18]], align 8 +// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK-64-NEXT: store i8* null, i8** [[TMP19]], align 8 +// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64* +// CHECK-64-NEXT: store i64 2, i64* [[TMP21]], align 8 +// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64* +// CHECK-64-NEXT: store i64 2, i64* [[TMP23]], align 8 +// CHECK-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK-64-NEXT: store i8* null, i8** [[TMP24]], align 8 +// CHECK-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK-64-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64* +// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[TMP26]], align 8 +// CHECK-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK-64-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64* +// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[TMP28]], align 8 +// CHECK-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK-64-NEXT: store i8* null, i8** [[TMP29]], align 8 +// CHECK-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK-64-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16** +// CHECK-64-NEXT: store i16* [[VLA]], i16** [[TMP31]], align 8 +// CHECK-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK-64-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16** +// CHECK-64-NEXT: store i16* [[VLA]], i16** [[TMP33]], align 8 +// CHECK-64-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 8 +// CHECK-64-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 +// CHECK-64-NEXT: store i8* null, i8** [[TMP35]], align 8 +// CHECK-64-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK-64-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK-64-NEXT: store i32 2, i32* [[TMP39]], align 4 +// CHECK-64-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK-64-NEXT: store i32 5, i32* [[TMP40]], align 4 +// CHECK-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK-64-NEXT: store i8** [[TMP36]], i8*** [[TMP41]], align 8 +// CHECK-64-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK-64-NEXT: store i8** [[TMP37]], i8*** [[TMP42]], align 8 +// CHECK-64-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK-64-NEXT: store i64* [[TMP38]], i64** [[TMP43]], align 8 +// CHECK-64-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK-64-NEXT: store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 8 +// CHECK-64-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP45]], align 8 +// CHECK-64-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP46]], align 8 +// CHECK-64-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK-64-NEXT: store i64 0, i64* [[TMP47]], align 8 +// CHECK-64-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK-64-NEXT: store i64 0, i64* [[TMP48]], align 8 +// CHECK-64-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4 +// CHECK-64-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4 +// CHECK-64-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK-64-NEXT: store i32 0, i32* [[TMP51]], align 4 +// CHECK-64-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK-64-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +// CHECK-64-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK-64: omp_offload.failed: +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]] +// CHECK-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK-64: omp_offload.cont: +// CHECK-64-NEXT: [[TMP54:%.*]] = mul nsw i64 1, [[TMP2]] +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP54]] +// CHECK-64-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK-64-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2 +// CHECK-64-NEXT: [[CONV3:%.*]] = sext i16 [[TMP55]] to i32 +// CHECK-64-NEXT: [[TMP56:%.*]] = load i32, i32* [[B]], align 4 +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP56]] +// CHECK-64-NEXT: [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK-64-NEXT: call void @llvm.stackrestore(i8* [[TMP57]]) +// CHECK-64-NEXT: ret i32 [[ADD4]] +// CHECK-64-LABEL: define {{[^@]+}}@_ZL7fstatici +// CHECK-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// CHECK-64-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8 +// CHECK-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK-64-NEXT: store i8 0, i8* [[AAA]], align 1 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK-64-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* +// CHECK-64-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64* +// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64* +// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK-64-NEXT: store i8* null, i8** [[TMP8]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64* +// CHECK-64-NEXT: store i64 [[TMP3]], i64* [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64* +// CHECK-64-NEXT: store i64 [[TMP3]], i64* [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK-64-NEXT: store i8* null, i8** [[TMP13]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]** +// CHECK-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 8 +// CHECK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]** +// CHECK-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 8 +// CHECK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK-64-NEXT: store i8* null, i8** [[TMP18]], align 8 +// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK-64-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK-64-NEXT: store i32 3, i32* [[TMP22]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK-64-NEXT: store i8** [[TMP19]], i8*** [[TMP23]], align 8 +// CHECK-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK-64-NEXT: store i8** [[TMP20]], i8*** [[TMP24]], align 8 +// CHECK-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 8 +// CHECK-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 8 +// CHECK-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP27]], align 8 +// CHECK-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP28]], align 8 +// CHECK-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK-64-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK-64-NEXT: store i64 0, i64* [[TMP30]], align 8 +// CHECK-64-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4 +// CHECK-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4 +// CHECK-64-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK-64-NEXT: store i32 0, i32* [[TMP33]], align 4 +// CHECK-64-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK-64-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK-64-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK-64: omp_offload.failed: +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i64 [[TMP1]], i64 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK-64: omp_offload.cont: +// CHECK-64-NEXT: [[TMP36:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: ret i32 [[TMP36]] +// CHECK-64-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// CHECK-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK-64-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i64* +// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[TMP3]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64* +// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK-64-NEXT: store i8* null, i8** [[TMP6]], align 8 +// CHECK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]** +// CHECK-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]** +// CHECK-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK-64-NEXT: store i8* null, i8** [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK-64-NEXT: store i32 2, i32* [[TMP14]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK-64-NEXT: store i32 2, i32* [[TMP15]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK-64-NEXT: store i8** [[TMP12]], i8*** [[TMP16]], align 8 +// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK-64-NEXT: store i8** [[TMP13]], i8*** [[TMP17]], align 8 +// CHECK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 8 +// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 8 +// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP20]], align 8 +// CHECK-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP21]], align 8 +// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK-64-NEXT: store i64 0, i64* [[TMP22]], align 8 +// CHECK-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK-64-NEXT: store i64 0, i64* [[TMP23]], align 8 +// CHECK-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4 +// CHECK-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK-64-NEXT: store i32 0, i32* [[TMP26]], align 4 +// CHECK-64-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK-64-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK-64-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK-64: omp_offload.failed: +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i64 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK-64: omp_offload.cont: +// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: ret i32 [[TMP29]] +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// CHECK-64-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// CHECK-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// CHECK-64-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// CHECK-64-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// CHECK-64-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2 +// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// CHECK-64-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2 +// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// CHECK-64-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false) +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double +// CHECK-64-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 +// CHECK-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: store double [[ADD]], double* [[A]], align 8 +// CHECK-64-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = load double, double* [[A5]], align 8 +// CHECK-64-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK-64-NEXT: store double [[INC]], double* [[A5]], align 8 +// CHECK-64-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 +// CHECK-64-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]] +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]] +// CHECK-64-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK-64-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK-64-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// CHECK-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK-64-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4 +// CHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 +// CHECK-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* +// CHECK-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8* +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK-64-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK-64-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// CHECK-64-NEXT: store i8 [[CONV5]], i8* [[CONV1]], align 1 +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK-64-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// CHECK-64-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK-64-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK-64-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK-64-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@_Z3fooiPd +// CHECK-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK-32-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// CHECK-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// CHECK-32-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK-32-NEXT: [[P:%.*]] = alloca i32*, align 64 +// CHECK-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[GA_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4 +// CHECK-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK-32-NEXT: store i16 0, i16* [[AA]], align 2 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = call i8* @llvm.stacksave() +// CHECK-32-NEXT: store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4 +// CHECK-32-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]] +// CHECK-32-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8 +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// CHECK-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[X]], align 4 +// CHECK-32-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[Y]], align 4 +// CHECK-32-NEXT: store i32* [[A]], i32** [[P]], align 64 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[A_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32*, i32** [[P]], align 64 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* @ga, align 4 +// CHECK-32-NEXT: store i32 [[TMP9]], i32* [[GA_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[GA_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32* +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[TMP12]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32* +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[TMP14]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK-32-NEXT: store i8* null, i8** [[TMP15]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32** +// CHECK-32-NEXT: store i32* [[TMP8]], i32** [[TMP17]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32** +// CHECK-32-NEXT: store i32* [[TMP8]], i32** [[TMP19]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK-32-NEXT: store i8* null, i8** [[TMP20]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32* +// CHECK-32-NEXT: store i32 [[TMP10]], i32* [[TMP22]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32* +// CHECK-32-NEXT: store i32 [[TMP10]], i32* [[TMP24]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK-32-NEXT: store i8* null, i8** [[TMP25]], align 4 +// CHECK-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK-32-NEXT: store i32 2, i32* [[TMP28]], align 4 +// CHECK-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK-32-NEXT: store i32 3, i32* [[TMP29]], align 4 +// CHECK-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK-32-NEXT: store i8** [[TMP26]], i8*** [[TMP30]], align 4 +// CHECK-32-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK-32-NEXT: store i8** [[TMP27]], i8*** [[TMP31]], align 4 +// CHECK-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP32]], align 4 +// CHECK-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP33]], align 4 +// CHECK-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP34]], align 4 +// CHECK-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP35]], align 4 +// CHECK-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK-32-NEXT: store i64 0, i64* [[TMP36]], align 8 +// CHECK-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK-32-NEXT: store i64 0, i64* [[TMP37]], align 8 +// CHECK-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP38]], align 4 +// CHECK-32-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP39]], align 4 +// CHECK-32-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK-32-NEXT: store i32 0, i32* [[TMP40]], align 4 +// CHECK-32-NEXT: [[TMP41:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK-32-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK-32-NEXT: br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK-32: omp_offload.failed: +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i32 [[TMP7]], i32* [[TMP8]], i32 [[TMP10]]) #[[ATTR3:[0-9]+]] +// CHECK-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK-32: omp_offload.cont: +// CHECK-32-NEXT: [[TMP43:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_CASTED]] to i16* +// CHECK-32-NEXT: store i16 [[TMP43]], i16* [[CONV]], align 2 +// CHECK-32-NEXT: [[TMP44:%.*]] = load i32, i32* [[AA_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP45:%.*]] = mul nuw i32 [[TMP0]], 4 +// CHECK-32-NEXT: [[TMP46:%.*]] = sext i32 [[TMP45]] to i64 +// CHECK-32-NEXT: [[TMP47:%.*]] = mul nuw i32 5, [[TMP2]] +// CHECK-32-NEXT: [[TMP48:%.*]] = mul nuw i32 [[TMP47]], 8 +// CHECK-32-NEXT: [[TMP49:%.*]] = sext i32 [[TMP48]] to i64 +// CHECK-32-NEXT: [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP50]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i32 72, i1 false) +// CHECK-32-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32* +// CHECK-32-NEXT: store i32 [[TMP44]], i32* [[TMP52]], align 4 +// CHECK-32-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32* +// CHECK-32-NEXT: store i32 [[TMP44]], i32* [[TMP54]], align 4 +// CHECK-32-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0 +// CHECK-32-NEXT: store i8* null, i8** [[TMP55]], align 4 +// CHECK-32-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]** +// CHECK-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 4 +// CHECK-32-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]** +// CHECK-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 4 +// CHECK-32-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1 +// CHECK-32-NEXT: store i8* null, i8** [[TMP60]], align 4 +// CHECK-32-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32* +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[TMP62]], align 4 +// CHECK-32-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32* +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[TMP64]], align 4 +// CHECK-32-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 2 +// CHECK-32-NEXT: store i8* null, i8** [[TMP65]], align 4 +// CHECK-32-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3 +// CHECK-32-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float** +// CHECK-32-NEXT: store float* [[VLA]], float** [[TMP67]], align 4 +// CHECK-32-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 3 +// CHECK-32-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float** +// CHECK-32-NEXT: store float* [[VLA]], float** [[TMP69]], align 4 +// CHECK-32-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3 +// CHECK-32-NEXT: store i64 [[TMP46]], i64* [[TMP70]], align 4 +// CHECK-32-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 3 +// CHECK-32-NEXT: store i8* null, i8** [[TMP71]], align 4 +// CHECK-32-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4 +// CHECK-32-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]** +// CHECK-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 4 +// CHECK-32-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 4 +// CHECK-32-NEXT: [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]** +// CHECK-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 4 +// CHECK-32-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 4 +// CHECK-32-NEXT: store i8* null, i8** [[TMP76]], align 4 +// CHECK-32-NEXT: [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5 +// CHECK-32-NEXT: [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32* +// CHECK-32-NEXT: store i32 5, i32* [[TMP78]], align 4 +// CHECK-32-NEXT: [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 5 +// CHECK-32-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32* +// CHECK-32-NEXT: store i32 5, i32* [[TMP80]], align 4 +// CHECK-32-NEXT: [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 5 +// CHECK-32-NEXT: store i8* null, i8** [[TMP81]], align 4 +// CHECK-32-NEXT: [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6 +// CHECK-32-NEXT: [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32* +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[TMP83]], align 4 +// CHECK-32-NEXT: [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 6 +// CHECK-32-NEXT: [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32* +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[TMP85]], align 4 +// CHECK-32-NEXT: [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 6 +// CHECK-32-NEXT: store i8* null, i8** [[TMP86]], align 4 +// CHECK-32-NEXT: [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7 +// CHECK-32-NEXT: [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double** +// CHECK-32-NEXT: store double* [[VLA1]], double** [[TMP88]], align 4 +// CHECK-32-NEXT: [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 7 +// CHECK-32-NEXT: [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double** +// CHECK-32-NEXT: store double* [[VLA1]], double** [[TMP90]], align 4 +// CHECK-32-NEXT: [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7 +// CHECK-32-NEXT: store i64 [[TMP49]], i64* [[TMP91]], align 4 +// CHECK-32-NEXT: [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 7 +// CHECK-32-NEXT: store i8* null, i8** [[TMP92]], align 4 +// CHECK-32-NEXT: [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8 +// CHECK-32-NEXT: [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT** +// CHECK-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 4 +// CHECK-32-NEXT: [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 8 +// CHECK-32-NEXT: [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT** +// CHECK-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 4 +// CHECK-32-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 8 +// CHECK-32-NEXT: store i8* null, i8** [[TMP97]], align 4 +// CHECK-32-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK-32-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK-32-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 0 +// CHECK-32-NEXT: store i32 2, i32* [[TMP101]], align 4 +// CHECK-32-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 1 +// CHECK-32-NEXT: store i32 9, i32* [[TMP102]], align 4 +// CHECK-32-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 2 +// CHECK-32-NEXT: store i8** [[TMP98]], i8*** [[TMP103]], align 4 +// CHECK-32-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 3 +// CHECK-32-NEXT: store i8** [[TMP99]], i8*** [[TMP104]], align 4 +// CHECK-32-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 4 +// CHECK-32-NEXT: store i64* [[TMP100]], i64** [[TMP105]], align 4 +// CHECK-32-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 5 +// CHECK-32-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 4 +// CHECK-32-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 6 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP107]], align 4 +// CHECK-32-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 7 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP108]], align 4 +// CHECK-32-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 8 +// CHECK-32-NEXT: store i64 0, i64* [[TMP109]], align 8 +// CHECK-32-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 9 +// CHECK-32-NEXT: store i64 0, i64* [[TMP110]], align 8 +// CHECK-32-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 10 +// CHECK-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4 +// CHECK-32-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 11 +// CHECK-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4 +// CHECK-32-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 12 +// CHECK-32-NEXT: store i32 0, i32* [[TMP113]], align 4 +// CHECK-32-NEXT: [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]]) +// CHECK-32-NEXT: [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0 +// CHECK-32-NEXT: br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CHECK-32: omp_offload.failed6: +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i32 [[TMP44]], [10 x float]* [[B]], i32 [[TMP0]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP2]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]] +// CHECK-32-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CHECK-32: omp_offload.cont7: +// CHECK-32-NEXT: [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double** +// CHECK-32-NEXT: store double* [[TMP116]], double** [[TMP118]], align 4 +// CHECK-32-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double** +// CHECK-32-NEXT: store double* [[TMP116]], double** [[TMP120]], align 4 +// CHECK-32-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0 +// CHECK-32-NEXT: store i8* null, i8** [[TMP121]], align 4 +// CHECK-32-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0** +// CHECK-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 4 +// CHECK-32-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0** +// CHECK-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 4 +// CHECK-32-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1 +// CHECK-32-NEXT: store i8* null, i8** [[TMP126]], align 4 +// CHECK-32-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 +// CHECK-32-NEXT: [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK-32-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 0 +// CHECK-32-NEXT: store i32 2, i32* [[TMP129]], align 4 +// CHECK-32-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 1 +// CHECK-32-NEXT: store i32 2, i32* [[TMP130]], align 4 +// CHECK-32-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 2 +// CHECK-32-NEXT: store i8** [[TMP127]], i8*** [[TMP131]], align 4 +// CHECK-32-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 3 +// CHECK-32-NEXT: store i8** [[TMP128]], i8*** [[TMP132]], align 4 +// CHECK-32-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 4 +// CHECK-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 4 +// CHECK-32-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 5 +// CHECK-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 4 +// CHECK-32-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 6 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP135]], align 4 +// CHECK-32-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 7 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP136]], align 4 +// CHECK-32-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 8 +// CHECK-32-NEXT: store i64 0, i64* [[TMP137]], align 8 +// CHECK-32-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 9 +// CHECK-32-NEXT: store i64 0, i64* [[TMP138]], align 8 +// CHECK-32-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 10 +// CHECK-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4 +// CHECK-32-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 11 +// CHECK-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4 +// CHECK-32-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 12 +// CHECK-32-NEXT: store i32 0, i32* [[TMP141]], align 4 +// CHECK-32-NEXT: [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]]) +// CHECK-32-NEXT: [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0 +// CHECK-32-NEXT: br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]] +// CHECK-32: omp_offload.failed12: +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]] +// CHECK-32-NEXT: br label [[OMP_OFFLOAD_CONT13]] +// CHECK-32: omp_offload.cont13: +// CHECK-32-NEXT: [[TMP144:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK-32-NEXT: call void @llvm.stackrestore(i8* [[TMP145]]) +// CHECK-32-NEXT: ret i32 [[TMP144]] +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// CHECK-32-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[GA]], i32* [[GA_ADDR]], align 4 +// CHECK-32-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// CHECK-32-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 +// CHECK-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 +// CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 +// CHECK-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 +// CHECK-32-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 +// CHECK-32-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// CHECK-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// CHECK-32-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 +// CHECK-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// CHECK-32-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 +// CHECK-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// CHECK-32-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 +// CHECK-32-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 +// CHECK-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 +// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK-32-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// CHECK-32-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false) +// CHECK-32-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// CHECK-32-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4 +// CHECK-32-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false) +// CHECK-32-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false) +// CHECK-32-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8 +// CHECK-32-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// CHECK-32-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false) +// CHECK-32-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// CHECK-32-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false) +// CHECK-32-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK-32-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// CHECK-32-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK-32-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2 +// CHECK-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK-32-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3 +// CHECK-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// CHECK-32-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1 +// CHECK-32-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 +// CHECK-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// CHECK-32-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]] +// CHECK-32-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]] +// CHECK-32-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3 +// CHECK-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// CHECK-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// CHECK-32-NEXT: store i64 1, i64* [[X]], align 4 +// CHECK-32-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// CHECK-32-NEXT: store i8 1, i8* [[Y]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK-32-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// CHECK-32-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// CHECK-32-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK-32-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4 +// CHECK-32-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8* +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 8, i1 false) +// CHECK-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[X]], align 4 +// CHECK-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP3]] to double +// CHECK-32-NEXT: [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i32 0 +// CHECK-32-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK-32-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i32 0 +// CHECK-32-NEXT: [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 4 +// CHECK-32-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// CHECK-32-NEXT: store double [[INC]], double* [[ARRAYIDX2]], align 4 +// CHECK-32-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@_Z3bariPd +// CHECK-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 +// CHECK-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK-32-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]]) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]]) +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// CHECK-32-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]]) +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// CHECK-32-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: ret i32 [[TMP9]] +// CHECK-32-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// CHECK-32-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4 +// CHECK-32-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// CHECK-32-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]] +// CHECK-32-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[B]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 +// CHECK-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP6:%.*]] = mul nuw i32 2, [[TMP1]] +// CHECK-32-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// CHECK-32-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 +// CHECK-32-NEXT: [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i32 40, i1 false) +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1** +// CHECK-32-NEXT: store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double** +// CHECK-32-NEXT: store double* [[A]], double** [[TMP13]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK-32-NEXT: store i8* null, i8** [[TMP14]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i32* +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[TMP16]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32* +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[TMP18]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK-32-NEXT: store i8* null, i8** [[TMP19]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32* +// CHECK-32-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32* +// CHECK-32-NEXT: store i32 2, i32* [[TMP23]], align 4 +// CHECK-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK-32-NEXT: store i8* null, i8** [[TMP24]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK-32-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32* +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[TMP26]], align 4 +// CHECK-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK-32-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32* +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[TMP28]], align 4 +// CHECK-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK-32-NEXT: store i8* null, i8** [[TMP29]], align 4 +// CHECK-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK-32-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16** +// CHECK-32-NEXT: store i16* [[VLA]], i16** [[TMP31]], align 4 +// CHECK-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK-32-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16** +// CHECK-32-NEXT: store i16* [[VLA]], i16** [[TMP33]], align 4 +// CHECK-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK-32-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 4 +// CHECK-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 +// CHECK-32-NEXT: store i8* null, i8** [[TMP35]], align 4 +// CHECK-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK-32-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK-32-NEXT: store i32 2, i32* [[TMP39]], align 4 +// CHECK-32-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK-32-NEXT: store i32 5, i32* [[TMP40]], align 4 +// CHECK-32-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK-32-NEXT: store i8** [[TMP36]], i8*** [[TMP41]], align 4 +// CHECK-32-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK-32-NEXT: store i8** [[TMP37]], i8*** [[TMP42]], align 4 +// CHECK-32-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK-32-NEXT: store i64* [[TMP38]], i64** [[TMP43]], align 4 +// CHECK-32-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK-32-NEXT: store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 4 +// CHECK-32-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP45]], align 4 +// CHECK-32-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP46]], align 4 +// CHECK-32-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK-32-NEXT: store i64 0, i64* [[TMP47]], align 8 +// CHECK-32-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK-32-NEXT: store i64 0, i64* [[TMP48]], align 8 +// CHECK-32-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4 +// CHECK-32-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4 +// CHECK-32-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK-32-NEXT: store i32 0, i32* [[TMP51]], align 4 +// CHECK-32-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK-32-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +// CHECK-32-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK-32: omp_offload.failed: +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]] +// CHECK-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK-32: omp_offload.cont: +// CHECK-32-NEXT: [[TMP54:%.*]] = mul nsw i32 1, [[TMP1]] +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP54]] +// CHECK-32-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK-32-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2 +// CHECK-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP55]] to i32 +// CHECK-32-NEXT: [[TMP56:%.*]] = load i32, i32* [[B]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP56]] +// CHECK-32-NEXT: [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK-32-NEXT: call void @llvm.stackrestore(i8* [[TMP57]]) +// CHECK-32-NEXT: ret i32 [[ADD3]] +// CHECK-32-LABEL: define {{[^@]+}}@_ZL7fstatici +// CHECK-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// CHECK-32-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4 +// CHECK-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK-32-NEXT: store i8 0, i8* [[AAA]], align 1 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* +// CHECK-32-NEXT: store i8 [[TMP2]], i8* [[CONV]], align 1 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32* +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32* +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK-32-NEXT: store i8* null, i8** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32* +// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32* +// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[TMP12]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK-32-NEXT: store i8* null, i8** [[TMP13]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]** +// CHECK-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]** +// CHECK-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK-32-NEXT: store i8* null, i8** [[TMP18]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK-32-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK-32-NEXT: store i32 3, i32* [[TMP22]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK-32-NEXT: store i8** [[TMP19]], i8*** [[TMP23]], align 4 +// CHECK-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK-32-NEXT: store i8** [[TMP20]], i8*** [[TMP24]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 4 +// CHECK-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 4 +// CHECK-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP27]], align 4 +// CHECK-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP28]], align 4 +// CHECK-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK-32-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK-32-NEXT: store i64 0, i64* [[TMP30]], align 8 +// CHECK-32-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4 +// CHECK-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4 +// CHECK-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK-32-NEXT: store i32 0, i32* [[TMP33]], align 4 +// CHECK-32-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK-32-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK-32-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK-32: omp_offload.failed: +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i32 [[TMP1]], i32 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK-32: omp_offload.cont: +// CHECK-32-NEXT: [[TMP36:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: ret i32 [[TMP36]] +// CHECK-32-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// CHECK-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32* +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[TMP3]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32* +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK-32-NEXT: store i8* null, i8** [[TMP6]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]** +// CHECK-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]** +// CHECK-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK-32-NEXT: store i8* null, i8** [[TMP11]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK-32-NEXT: store i32 2, i32* [[TMP14]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK-32-NEXT: store i32 2, i32* [[TMP15]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK-32-NEXT: store i8** [[TMP12]], i8*** [[TMP16]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK-32-NEXT: store i8** [[TMP13]], i8*** [[TMP17]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP20]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP21]], align 4 +// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK-32-NEXT: store i64 0, i64* [[TMP22]], align 8 +// CHECK-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK-32-NEXT: store i64 0, i64* [[TMP23]], align 8 +// CHECK-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4 +// CHECK-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK-32-NEXT: store i32 0, i32* [[TMP26]], align 4 +// CHECK-32-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK-32-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK-32-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK-32: omp_offload.failed: +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i32 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK-32: omp_offload.cont: +// CHECK-32-NEXT: [[TMP29:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: ret i32 [[TMP29]] +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// CHECK-32-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// CHECK-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// CHECK-32-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// CHECK-32-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// CHECK-32-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// CHECK-32-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// CHECK-32-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false) +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double +// CHECK-32-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: store double [[ADD]], double* [[A]], align 4 +// CHECK-32-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = load double, double* [[A4]], align 4 +// CHECK-32-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK-32-NEXT: store double [[INC]], double* [[A4]], align 4 +// CHECK-32-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// CHECK-32-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]] +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]] +// CHECK-32-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK-32-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK-32-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// CHECK-32-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// CHECK-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 +// CHECK-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* +// CHECK-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-NEXT: [[CONV2:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 +// CHECK-32-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 +// CHECK-32-NEXT: store i8 [[CONV4]], i8* [[CONV]], align 1 +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK-32-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 +// CHECK-32-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// CHECK-32-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// CHECK-32-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK-32-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK-32-NEXT: ret void +// CHECK0-64-LABEL: define {{[^@]+}}@_Z3fooiPd +// CHECK0-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK0-64-NEXT: entry: +// CHECK0-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK0-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK0-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK0-64-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK0-64-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// CHECK0-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK0-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK0-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// CHECK0-64-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK0-64-NEXT: [[P:%.*]] = alloca i32*, align 64 +// CHECK0-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[GA_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8 +// CHECK0-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [9 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_PTRS5:%.*]] = alloca [9 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [9 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x i8*], align 8 +// CHECK0-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK0-64-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK0-64-NEXT: store i16 0, i16* [[AA]], align 2 +// CHECK0-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK0-64-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// CHECK0-64-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8 +// CHECK0-64-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4 +// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK0-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// CHECK0-64-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]] +// CHECK0-64-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8 +// CHECK0-64-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// CHECK0-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: store i32 [[TMP6]], i32* [[X]], align 4 +// CHECK0-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// CHECK0-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: store i32 [[TMP7]], i32* [[Y]], align 4 +// CHECK0-64-NEXT: store i32* [[A]], i32** [[P]], align 64 +// CHECK0-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK0-64-NEXT: store i32 [[TMP8]], i32* [[CONV]], align 4 +// CHECK0-64-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK0-64-NEXT: [[TMP10:%.*]] = load i32*, i32** [[P]], align 64 +// CHECK0-64-NEXT: [[TMP11:%.*]] = load i32, i32* @ga, align 4 +// CHECK0-64-NEXT: [[CONV2:%.*]] = bitcast i64* [[GA_CASTED]] to i32* +// CHECK0-64-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 +// CHECK0-64-NEXT: [[TMP12:%.*]] = load i64, i64* [[GA_CASTED]], align 8 +// CHECK0-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP9]], i64* [[TMP14]], align 8 +// CHECK0-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP9]], i64* [[TMP16]], align 8 +// CHECK0-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP17]], align 8 +// CHECK0-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK0-64-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32** +// CHECK0-64-NEXT: store i32* [[TMP10]], i32** [[TMP19]], align 8 +// CHECK0-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK0-64-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32** +// CHECK0-64-NEXT: store i32* [[TMP10]], i32** [[TMP21]], align 8 +// CHECK0-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP22]], align 8 +// CHECK0-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK0-64-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP12]], i64* [[TMP24]], align 8 +// CHECK0-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK0-64-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP12]], i64* [[TMP26]], align 8 +// CHECK0-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP27]], align 8 +// CHECK0-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK0-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK0-64-NEXT: store i32 2, i32* [[TMP30]], align 4 +// CHECK0-64-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK0-64-NEXT: store i32 3, i32* [[TMP31]], align 4 +// CHECK0-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK0-64-NEXT: store i8** [[TMP28]], i8*** [[TMP32]], align 8 +// CHECK0-64-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK0-64-NEXT: store i8** [[TMP29]], i8*** [[TMP33]], align 8 +// CHECK0-64-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK0-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP34]], align 8 +// CHECK0-64-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK0-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP35]], align 8 +// CHECK0-64-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK0-64-NEXT: store i8** null, i8*** [[TMP36]], align 8 +// CHECK0-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK0-64-NEXT: store i8** null, i8*** [[TMP37]], align 8 +// CHECK0-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK0-64-NEXT: store i64 0, i64* [[TMP38]], align 8 +// CHECK0-64-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK0-64-NEXT: store i64 0, i64* [[TMP39]], align 8 +// CHECK0-64-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK0-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP40]], align 4 +// CHECK0-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK0-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP41]], align 4 +// CHECK0-64-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK0-64-NEXT: store i32 0, i32* [[TMP42]], align 4 +// CHECK0-64-NEXT: [[TMP43:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK0-64-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0 +// CHECK0-64-NEXT: br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK0-64: omp_offload.failed: +// CHECK0-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i64 [[TMP9]], i32* [[TMP10]], i64 [[TMP12]]) #[[ATTR3:[0-9]+]] +// CHECK0-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK0-64: omp_offload.cont: +// CHECK0-64-NEXT: [[TMP45:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK0-64-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* +// CHECK0-64-NEXT: store i16 [[TMP45]], i16* [[CONV3]], align 2 +// CHECK0-64-NEXT: [[TMP46:%.*]] = load i64, i64* [[AA_CASTED]], align 8 +// CHECK0-64-NEXT: [[TMP47:%.*]] = mul nuw i64 [[TMP1]], 4 +// CHECK0-64-NEXT: [[TMP48:%.*]] = mul nuw i64 5, [[TMP4]] +// CHECK0-64-NEXT: [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 8 +// CHECK0-64-NEXT: [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP50]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i64 72, i1 false) +// CHECK0-64-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP46]], i64* [[TMP52]], align 8 +// CHECK0-64-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP46]], i64* [[TMP54]], align 8 +// CHECK0-64-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 0 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP55]], align 8 +// CHECK0-64-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1 +// CHECK0-64-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]** +// CHECK0-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 8 +// CHECK0-64-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1 +// CHECK0-64-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]** +// CHECK0-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 8 +// CHECK0-64-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 1 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP60]], align 8 +// CHECK0-64-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2 +// CHECK0-64-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[TMP62]], align 8 +// CHECK0-64-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2 +// CHECK0-64-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[TMP64]], align 8 +// CHECK0-64-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 2 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP65]], align 8 +// CHECK0-64-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3 +// CHECK0-64-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float** +// CHECK0-64-NEXT: store float* [[VLA]], float** [[TMP67]], align 8 +// CHECK0-64-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3 +// CHECK0-64-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float** +// CHECK0-64-NEXT: store float* [[VLA]], float** [[TMP69]], align 8 +// CHECK0-64-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3 +// CHECK0-64-NEXT: store i64 [[TMP47]], i64* [[TMP70]], align 8 +// CHECK0-64-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 3 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP71]], align 8 +// CHECK0-64-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 4 +// CHECK0-64-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]** +// CHECK0-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 8 +// CHECK0-64-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 4 +// CHECK0-64-NEXT: [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]** +// CHECK0-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 8 +// CHECK0-64-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 4 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP76]], align 8 +// CHECK0-64-NEXT: [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 5 +// CHECK0-64-NEXT: [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64* +// CHECK0-64-NEXT: store i64 5, i64* [[TMP78]], align 8 +// CHECK0-64-NEXT: [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 5 +// CHECK0-64-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64* +// CHECK0-64-NEXT: store i64 5, i64* [[TMP80]], align 8 +// CHECK0-64-NEXT: [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 5 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP81]], align 8 +// CHECK0-64-NEXT: [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 6 +// CHECK0-64-NEXT: [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP4]], i64* [[TMP83]], align 8 +// CHECK0-64-NEXT: [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 6 +// CHECK0-64-NEXT: [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP4]], i64* [[TMP85]], align 8 +// CHECK0-64-NEXT: [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 6 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP86]], align 8 +// CHECK0-64-NEXT: [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 7 +// CHECK0-64-NEXT: [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double** +// CHECK0-64-NEXT: store double* [[VLA1]], double** [[TMP88]], align 8 +// CHECK0-64-NEXT: [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 7 +// CHECK0-64-NEXT: [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double** +// CHECK0-64-NEXT: store double* [[VLA1]], double** [[TMP90]], align 8 +// CHECK0-64-NEXT: [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7 +// CHECK0-64-NEXT: store i64 [[TMP49]], i64* [[TMP91]], align 8 +// CHECK0-64-NEXT: [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 7 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP92]], align 8 +// CHECK0-64-NEXT: [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 8 +// CHECK0-64-NEXT: [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT** +// CHECK0-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 8 +// CHECK0-64-NEXT: [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 8 +// CHECK0-64-NEXT: [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT** +// CHECK0-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 8 +// CHECK0-64-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 8 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP97]], align 8 +// CHECK0-64-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK0-64-NEXT: [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK0-64-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 0 +// CHECK0-64-NEXT: store i32 2, i32* [[TMP101]], align 4 +// CHECK0-64-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 1 +// CHECK0-64-NEXT: store i32 9, i32* [[TMP102]], align 4 +// CHECK0-64-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 2 +// CHECK0-64-NEXT: store i8** [[TMP98]], i8*** [[TMP103]], align 8 +// CHECK0-64-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 3 +// CHECK0-64-NEXT: store i8** [[TMP99]], i8*** [[TMP104]], align 8 +// CHECK0-64-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 4 +// CHECK0-64-NEXT: store i64* [[TMP100]], i64** [[TMP105]], align 8 +// CHECK0-64-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 5 +// CHECK0-64-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 8 +// CHECK0-64-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 6 +// CHECK0-64-NEXT: store i8** null, i8*** [[TMP107]], align 8 +// CHECK0-64-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 7 +// CHECK0-64-NEXT: store i8** null, i8*** [[TMP108]], align 8 +// CHECK0-64-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 8 +// CHECK0-64-NEXT: store i64 0, i64* [[TMP109]], align 8 +// CHECK0-64-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 9 +// CHECK0-64-NEXT: store i64 0, i64* [[TMP110]], align 8 +// CHECK0-64-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 10 +// CHECK0-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4 +// CHECK0-64-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 11 +// CHECK0-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4 +// CHECK0-64-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 12 +// CHECK0-64-NEXT: store i32 0, i32* [[TMP113]], align 4 +// CHECK0-64-NEXT: [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]]) +// CHECK0-64-NEXT: [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0 +// CHECK0-64-NEXT: br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]] +// CHECK0-64: omp_offload.failed8: +// CHECK0-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i64 [[TMP46]], [10 x float]* [[B]], i64 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP4]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]] +// CHECK0-64-NEXT: br label [[OMP_OFFLOAD_CONT9]] +// CHECK0-64: omp_offload.cont9: +// CHECK0-64-NEXT: [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double** +// CHECK0-64-NEXT: store double* [[TMP116]], double** [[TMP118]], align 8 +// CHECK0-64-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double** +// CHECK0-64-NEXT: store double* [[TMP116]], double** [[TMP120]], align 8 +// CHECK0-64-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP121]], align 8 +// CHECK0-64-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1 +// CHECK0-64-NEXT: [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0** +// CHECK0-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 8 +// CHECK0-64-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 1 +// CHECK0-64-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0** +// CHECK0-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 8 +// CHECK0-64-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP126]], align 8 +// CHECK0-64-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CHECK0-64-NEXT: [[KERNEL_ARGS13:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK0-64-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 0 +// CHECK0-64-NEXT: store i32 2, i32* [[TMP129]], align 4 +// CHECK0-64-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 1 +// CHECK0-64-NEXT: store i32 2, i32* [[TMP130]], align 4 +// CHECK0-64-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 2 +// CHECK0-64-NEXT: store i8** [[TMP127]], i8*** [[TMP131]], align 8 +// CHECK0-64-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 3 +// CHECK0-64-NEXT: store i8** [[TMP128]], i8*** [[TMP132]], align 8 +// CHECK0-64-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 4 +// CHECK0-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 8 +// CHECK0-64-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 5 +// CHECK0-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 8 +// CHECK0-64-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 6 +// CHECK0-64-NEXT: store i8** null, i8*** [[TMP135]], align 8 +// CHECK0-64-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 7 +// CHECK0-64-NEXT: store i8** null, i8*** [[TMP136]], align 8 +// CHECK0-64-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 8 +// CHECK0-64-NEXT: store i64 0, i64* [[TMP137]], align 8 +// CHECK0-64-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 9 +// CHECK0-64-NEXT: store i64 0, i64* [[TMP138]], align 8 +// CHECK0-64-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 10 +// CHECK0-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4 +// CHECK0-64-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 11 +// CHECK0-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4 +// CHECK0-64-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 12 +// CHECK0-64-NEXT: store i32 0, i32* [[TMP141]], align 4 +// CHECK0-64-NEXT: [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]]) +// CHECK0-64-NEXT: [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0 +// CHECK0-64-NEXT: br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED14:%.*]], label [[OMP_OFFLOAD_CONT15:%.*]] +// CHECK0-64: omp_offload.failed14: +// CHECK0-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]] +// CHECK0-64-NEXT: br label [[OMP_OFFLOAD_CONT15]] +// CHECK0-64: omp_offload.cont15: +// CHECK0-64-NEXT: [[TMP144:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-64-NEXT: [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK0-64-NEXT: call void @llvm.stackrestore(i8* [[TMP145]]) +// CHECK0-64-NEXT: ret i32 [[TMP144]] +// CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// CHECK0-64-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK0-64-NEXT: entry: +// CHECK0-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 8 +// CHECK0-64-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK0-64-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 8 +// CHECK0-64-NEXT: store i64 [[GA]], i64* [[GA_ADDR]], align 8 +// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK0-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32* +// CHECK0-64-NEXT: ret void +// CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// CHECK0-64-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] { +// CHECK0-64-NEXT: entry: +// CHECK0-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 +// CHECK0-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 +// CHECK0-64-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 +// CHECK0-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 +// CHECK0-64-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 +// CHECK0-64-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// CHECK0-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK0-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK0-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// CHECK0-64-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 +// CHECK0-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 +// CHECK0-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// CHECK0-64-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 +// CHECK0-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 +// CHECK0-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// CHECK0-64-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 +// CHECK0-64-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 +// CHECK0-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 +// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK0-64-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// CHECK0-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 +// CHECK0-64-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// CHECK0-64-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false) +// CHECK0-64-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// CHECK0-64-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8 +// CHECK0-64-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4 +// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK0-64-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4 +// CHECK0-64-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// CHECK0-64-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false) +// CHECK0-64-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// CHECK0-64-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false) +// CHECK0-64-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// CHECK0-64-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8 +// CHECK0-64-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// CHECK0-64-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8 +// CHECK0-64-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// CHECK0-64-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 +// CHECK0-64-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// CHECK0-64-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false) +// CHECK0-64-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// CHECK0-64-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false) +// CHECK0-64-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK0-64-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK0-64-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// CHECK0-64-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK0-64-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// CHECK0-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2 +// CHECK0-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK0-64-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3 +// CHECK0-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// CHECK0-64-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1 +// CHECK0-64-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2 +// CHECK0-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// CHECK0-64-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]] +// CHECK0-64-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]] +// CHECK0-64-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3 +// CHECK0-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// CHECK0-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// CHECK0-64-NEXT: store i64 1, i64* [[X]], align 8 +// CHECK0-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// CHECK0-64-NEXT: store i8 1, i8* [[Y]], align 8 +// CHECK0-64-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK0-64-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// CHECK0-64-NEXT: ret void +// CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// CHECK0-64-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] { +// CHECK0-64-NEXT: entry: +// CHECK0-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK0-64-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8 +// CHECK0-64-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK0-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK0-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8* +// CHECK0-64-NEXT: [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8* +// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 8, i1 false) +// CHECK0-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[X]], align 4 +// CHECK0-64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP3]] to double +// CHECK0-64-NEXT: [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK0-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 0 +// CHECK0-64-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 8 +// CHECK0-64-NEXT: [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK0-64-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 0 +// CHECK0-64-NEXT: [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 8 +// CHECK0-64-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// CHECK0-64-NEXT: store double [[INC]], double* [[ARRAYIDX2]], align 8 +// CHECK0-64-NEXT: ret void +// CHECK0-64-LABEL: define {{[^@]+}}@_Z3bariPd +// CHECK0-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// CHECK0-64-NEXT: entry: +// CHECK0-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK0-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK0-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK0-64-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 +// CHECK0-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK0-64-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK0-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK0-64-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]]) +// CHECK0-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// CHECK0-64-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK0-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]]) +// CHECK0-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// CHECK0-64-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK0-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]]) +// CHECK0-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// CHECK0-64-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// CHECK0-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]]) +// CHECK0-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// CHECK0-64-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// CHECK0-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-64-NEXT: ret i32 [[TMP9]] +// CHECK0-64-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// CHECK0-64-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK0-64-NEXT: entry: +// CHECK0-64-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK0-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK0-64-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK0-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK0-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8 +// CHECK0-64-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// CHECK0-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK0-64-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// CHECK0-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK0-64-NEXT: [[TMP3:%.*]] = call i8* @llvm.stacksave() +// CHECK0-64-NEXT: store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8 +// CHECK0-64-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]] +// CHECK0-64-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2 +// CHECK0-64-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8 +// CHECK0-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to i32* +// CHECK0-64-NEXT: store i32 [[TMP5]], i32* [[CONV]], align 4 +// CHECK0-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 +// CHECK0-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP7:%.*]] = mul nuw i64 2, [[TMP2]] +// CHECK0-64-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 +// CHECK0-64-NEXT: [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP9]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i64 40, i1 false) +// CHECK0-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1** +// CHECK0-64-NEXT: store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8 +// CHECK0-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double** +// CHECK0-64-NEXT: store double* [[A]], double** [[TMP13]], align 8 +// CHECK0-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP14]], align 8 +// CHECK0-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK0-64-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP6]], i64* [[TMP16]], align 8 +// CHECK0-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK0-64-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP6]], i64* [[TMP18]], align 8 +// CHECK0-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP19]], align 8 +// CHECK0-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK0-64-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64* +// CHECK0-64-NEXT: store i64 2, i64* [[TMP21]], align 8 +// CHECK0-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK0-64-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64* +// CHECK0-64-NEXT: store i64 2, i64* [[TMP23]], align 8 +// CHECK0-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP24]], align 8 +// CHECK0-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK0-64-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP2]], i64* [[TMP26]], align 8 +// CHECK0-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK0-64-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP2]], i64* [[TMP28]], align 8 +// CHECK0-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP29]], align 8 +// CHECK0-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK0-64-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16** +// CHECK0-64-NEXT: store i16* [[VLA]], i16** [[TMP31]], align 8 +// CHECK0-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK0-64-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16** +// CHECK0-64-NEXT: store i16* [[VLA]], i16** [[TMP33]], align 8 +// CHECK0-64-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK0-64-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 8 +// CHECK0-64-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP35]], align 8 +// CHECK0-64-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK0-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK0-64-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK0-64-NEXT: store i32 2, i32* [[TMP39]], align 4 +// CHECK0-64-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK0-64-NEXT: store i32 5, i32* [[TMP40]], align 4 +// CHECK0-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK0-64-NEXT: store i8** [[TMP36]], i8*** [[TMP41]], align 8 +// CHECK0-64-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK0-64-NEXT: store i8** [[TMP37]], i8*** [[TMP42]], align 8 +// CHECK0-64-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK0-64-NEXT: store i64* [[TMP38]], i64** [[TMP43]], align 8 +// CHECK0-64-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK0-64-NEXT: store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 8 +// CHECK0-64-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK0-64-NEXT: store i8** null, i8*** [[TMP45]], align 8 +// CHECK0-64-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK0-64-NEXT: store i8** null, i8*** [[TMP46]], align 8 +// CHECK0-64-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK0-64-NEXT: store i64 0, i64* [[TMP47]], align 8 +// CHECK0-64-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK0-64-NEXT: store i64 0, i64* [[TMP48]], align 8 +// CHECK0-64-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK0-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4 +// CHECK0-64-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK0-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4 +// CHECK0-64-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK0-64-NEXT: store i32 0, i32* [[TMP51]], align 4 +// CHECK0-64-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK0-64-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +// CHECK0-64-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK0-64: omp_offload.failed: +// CHECK0-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]] +// CHECK0-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK0-64: omp_offload.cont: +// CHECK0-64-NEXT: [[TMP54:%.*]] = mul nsw i64 1, [[TMP2]] +// CHECK0-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP54]] +// CHECK0-64-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK0-64-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2 +// CHECK0-64-NEXT: [[CONV3:%.*]] = sext i16 [[TMP55]] to i32 +// CHECK0-64-NEXT: [[TMP56:%.*]] = load i32, i32* [[B]], align 4 +// CHECK0-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP56]] +// CHECK0-64-NEXT: [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK0-64-NEXT: call void @llvm.stackrestore(i8* [[TMP57]]) +// CHECK0-64-NEXT: ret i32 [[ADD4]] +// CHECK0-64-LABEL: define {{[^@]+}}@_ZL7fstatici +// CHECK0-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] { +// CHECK0-64-NEXT: entry: +// CHECK0-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK0-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK0-64-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// CHECK0-64-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK0-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8 +// CHECK0-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK0-64-NEXT: store i8 0, i8* [[AAA]], align 1 +// CHECK0-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK0-64-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 +// CHECK0-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK0-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK0-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* +// CHECK0-64-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 +// CHECK0-64-NEXT: [[TMP3:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 +// CHECK0-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8 +// CHECK0-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8 +// CHECK0-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP8]], align 8 +// CHECK0-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK0-64-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP3]], i64* [[TMP10]], align 8 +// CHECK0-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK0-64-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP3]], i64* [[TMP12]], align 8 +// CHECK0-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP13]], align 8 +// CHECK0-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK0-64-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]** +// CHECK0-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 8 +// CHECK0-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK0-64-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]** +// CHECK0-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 8 +// CHECK0-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP18]], align 8 +// CHECK0-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK0-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK0-64-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK0-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK0-64-NEXT: store i32 3, i32* [[TMP22]], align 4 +// CHECK0-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK0-64-NEXT: store i8** [[TMP19]], i8*** [[TMP23]], align 8 +// CHECK0-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK0-64-NEXT: store i8** [[TMP20]], i8*** [[TMP24]], align 8 +// CHECK0-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK0-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 8 +// CHECK0-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK0-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 8 +// CHECK0-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK0-64-NEXT: store i8** null, i8*** [[TMP27]], align 8 +// CHECK0-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK0-64-NEXT: store i8** null, i8*** [[TMP28]], align 8 +// CHECK0-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK0-64-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK0-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK0-64-NEXT: store i64 0, i64* [[TMP30]], align 8 +// CHECK0-64-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK0-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4 +// CHECK0-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK0-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4 +// CHECK0-64-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK0-64-NEXT: store i32 0, i32* [[TMP33]], align 4 +// CHECK0-64-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK0-64-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK0-64-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK0-64: omp_offload.failed: +// CHECK0-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i64 [[TMP1]], i64 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK0-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK0-64: omp_offload.cont: +// CHECK0-64-NEXT: [[TMP36:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-64-NEXT: ret i32 [[TMP36]] +// CHECK0-64-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// CHECK0-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat { +// CHECK0-64-NEXT: entry: +// CHECK0-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK0-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK0-64-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK0-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8 +// CHECK0-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK0-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK0-64-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 +// CHECK0-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK0-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[TMP3]], align 8 +// CHECK0-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8 +// CHECK0-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP6]], align 8 +// CHECK0-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK0-64-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]** +// CHECK0-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 8 +// CHECK0-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK0-64-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]** +// CHECK0-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 8 +// CHECK0-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP11]], align 8 +// CHECK0-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK0-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK0-64-NEXT: store i32 2, i32* [[TMP14]], align 4 +// CHECK0-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK0-64-NEXT: store i32 2, i32* [[TMP15]], align 4 +// CHECK0-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK0-64-NEXT: store i8** [[TMP12]], i8*** [[TMP16]], align 8 +// CHECK0-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK0-64-NEXT: store i8** [[TMP13]], i8*** [[TMP17]], align 8 +// CHECK0-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK0-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 8 +// CHECK0-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK0-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 8 +// CHECK0-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK0-64-NEXT: store i8** null, i8*** [[TMP20]], align 8 +// CHECK0-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK0-64-NEXT: store i8** null, i8*** [[TMP21]], align 8 +// CHECK0-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK0-64-NEXT: store i64 0, i64* [[TMP22]], align 8 +// CHECK0-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK0-64-NEXT: store i64 0, i64* [[TMP23]], align 8 +// CHECK0-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK0-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4 +// CHECK0-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK0-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4 +// CHECK0-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK0-64-NEXT: store i32 0, i32* [[TMP26]], align 4 +// CHECK0-64-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK0-64-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK0-64-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK0-64: omp_offload.failed: +// CHECK0-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i64 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK0-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK0-64: omp_offload.cont: +// CHECK0-64-NEXT: [[TMP29:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-64-NEXT: ret i32 [[TMP29]] +// CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// CHECK0-64-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK0-64-NEXT: entry: +// CHECK0-64-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK0-64-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// CHECK0-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK0-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// CHECK0-64-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 +// CHECK0-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// CHECK0-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// CHECK0-64-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* +// CHECK0-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// CHECK0-64-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// CHECK0-64-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8 +// CHECK0-64-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// CHECK0-64-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2 +// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK0-64-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8 +// CHECK0-64-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// CHECK0-64-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2 +// CHECK0-64-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// CHECK0-64-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false) +// CHECK0-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK0-64-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double +// CHECK0-64-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 +// CHECK0-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK0-64-NEXT: store double [[ADD]], double* [[A]], align 8 +// CHECK0-64-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP11:%.*]] = load double, double* [[A5]], align 8 +// CHECK0-64-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK0-64-NEXT: store double [[INC]], double* [[A5]], align 8 +// CHECK0-64-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 +// CHECK0-64-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]] +// CHECK0-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]] +// CHECK0-64-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK0-64-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK0-64-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK0-64-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// CHECK0-64-NEXT: ret void +// CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// CHECK0-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK0-64-NEXT: entry: +// CHECK0-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK0-64-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4 +// CHECK0-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK0-64-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 +// CHECK0-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK0-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* +// CHECK0-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8* +// CHECK0-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// CHECK0-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK0-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK0-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK0-64-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK0-64-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK0-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK0-64-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// CHECK0-64-NEXT: store i8 [[CONV5]], i8* [[CONV1]], align 1 +// CHECK0-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2 +// CHECK0-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK0-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK0-64-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK0-64-NEXT: ret void +// CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// CHECK0-64-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK0-64-NEXT: entry: +// CHECK0-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK0-64-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK0-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK0-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK0-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK0-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// CHECK0-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK0-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK0-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK0-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2 +// CHECK0-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK0-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK0-64-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// CHECK0-64-NEXT: ret void +// CHECK0-64-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK0-64-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK0-64-NEXT: entry: +// CHECK0-64-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK0-64-NEXT: ret void +// CHECK1-64-LABEL: define {{[^@]+}}@_Z3fooiPd +// CHECK1-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK1-64-NEXT: entry: +// CHECK1-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-64-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-64-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// CHECK1-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK1-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK1-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// CHECK1-64-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK1-64-NEXT: [[P:%.*]] = alloca i32*, align 64 +// CHECK1-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[GA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [9 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_PTRS5:%.*]] = alloca [9 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [9 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK1-64-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK1-64-NEXT: store i16 0, i16* [[AA]], align 2 +// CHECK1-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK1-64-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// CHECK1-64-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8 +// CHECK1-64-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4 +// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// CHECK1-64-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]] +// CHECK1-64-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8 +// CHECK1-64-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// CHECK1-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: store i32 [[TMP6]], i32* [[X]], align 4 +// CHECK1-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// CHECK1-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: store i32 [[TMP7]], i32* [[Y]], align 4 +// CHECK1-64-NEXT: store i32* [[A]], i32** [[P]], align 64 +// CHECK1-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK1-64-NEXT: store i32 [[TMP8]], i32* [[CONV]], align 4 +// CHECK1-64-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK1-64-NEXT: [[TMP10:%.*]] = load i32*, i32** [[P]], align 64 +// CHECK1-64-NEXT: [[TMP11:%.*]] = load i32, i32* @ga, align 4 +// CHECK1-64-NEXT: [[CONV2:%.*]] = bitcast i64* [[GA_CASTED]] to i32* +// CHECK1-64-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 +// CHECK1-64-NEXT: [[TMP12:%.*]] = load i64, i64* [[GA_CASTED]], align 8 +// CHECK1-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP9]], i64* [[TMP14]], align 8 +// CHECK1-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP9]], i64* [[TMP16]], align 8 +// CHECK1-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP17]], align 8 +// CHECK1-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-64-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32** +// CHECK1-64-NEXT: store i32* [[TMP10]], i32** [[TMP19]], align 8 +// CHECK1-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-64-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32** +// CHECK1-64-NEXT: store i32* [[TMP10]], i32** [[TMP21]], align 8 +// CHECK1-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP22]], align 8 +// CHECK1-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-64-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP12]], i64* [[TMP24]], align 8 +// CHECK1-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-64-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP12]], i64* [[TMP26]], align 8 +// CHECK1-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP27]], align 8 +// CHECK1-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-64-NEXT: store i32 2, i32* [[TMP30]], align 4 +// CHECK1-64-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-64-NEXT: store i32 3, i32* [[TMP31]], align 4 +// CHECK1-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-64-NEXT: store i8** [[TMP28]], i8*** [[TMP32]], align 8 +// CHECK1-64-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-64-NEXT: store i8** [[TMP29]], i8*** [[TMP33]], align 8 +// CHECK1-64-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP34]], align 8 +// CHECK1-64-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP35]], align 8 +// CHECK1-64-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-64-NEXT: store i8** null, i8*** [[TMP36]], align 8 +// CHECK1-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-64-NEXT: store i8** null, i8*** [[TMP37]], align 8 +// CHECK1-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-64-NEXT: store i64 0, i64* [[TMP38]], align 8 +// CHECK1-64-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-64-NEXT: store i64 0, i64* [[TMP39]], align 8 +// CHECK1-64-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP40]], align 4 +// CHECK1-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP41]], align 4 +// CHECK1-64-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-64-NEXT: store i32 0, i32* [[TMP42]], align 4 +// CHECK1-64-NEXT: [[TMP43:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK1-64-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0 +// CHECK1-64-NEXT: br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-64: omp_offload.failed: +// CHECK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i64 [[TMP9]], i32* [[TMP10]], i64 [[TMP12]]) #[[ATTR3:[0-9]+]] +// CHECK1-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1-64: omp_offload.cont: +// CHECK1-64-NEXT: [[TMP45:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK1-64-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* +// CHECK1-64-NEXT: store i16 [[TMP45]], i16* [[CONV3]], align 2 +// CHECK1-64-NEXT: [[TMP46:%.*]] = load i64, i64* [[AA_CASTED]], align 8 +// CHECK1-64-NEXT: [[TMP47:%.*]] = mul nuw i64 [[TMP1]], 4 +// CHECK1-64-NEXT: [[TMP48:%.*]] = mul nuw i64 5, [[TMP4]] +// CHECK1-64-NEXT: [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 8 +// CHECK1-64-NEXT: [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP50]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i64 72, i1 false) +// CHECK1-64-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP46]], i64* [[TMP52]], align 8 +// CHECK1-64-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP46]], i64* [[TMP54]], align 8 +// CHECK1-64-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 0 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP55]], align 8 +// CHECK1-64-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1 +// CHECK1-64-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]** +// CHECK1-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 8 +// CHECK1-64-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1 +// CHECK1-64-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]** +// CHECK1-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 8 +// CHECK1-64-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 1 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP60]], align 8 +// CHECK1-64-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2 +// CHECK1-64-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[TMP62]], align 8 +// CHECK1-64-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2 +// CHECK1-64-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[TMP64]], align 8 +// CHECK1-64-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 2 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP65]], align 8 +// CHECK1-64-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3 +// CHECK1-64-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float** +// CHECK1-64-NEXT: store float* [[VLA]], float** [[TMP67]], align 8 +// CHECK1-64-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3 +// CHECK1-64-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float** +// CHECK1-64-NEXT: store float* [[VLA]], float** [[TMP69]], align 8 +// CHECK1-64-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3 +// CHECK1-64-NEXT: store i64 [[TMP47]], i64* [[TMP70]], align 8 +// CHECK1-64-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 3 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP71]], align 8 +// CHECK1-64-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 4 +// CHECK1-64-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]** +// CHECK1-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 8 +// CHECK1-64-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 4 +// CHECK1-64-NEXT: [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]** +// CHECK1-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 8 +// CHECK1-64-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 4 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP76]], align 8 +// CHECK1-64-NEXT: [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 5 +// CHECK1-64-NEXT: [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64* +// CHECK1-64-NEXT: store i64 5, i64* [[TMP78]], align 8 +// CHECK1-64-NEXT: [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 5 +// CHECK1-64-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64* +// CHECK1-64-NEXT: store i64 5, i64* [[TMP80]], align 8 +// CHECK1-64-NEXT: [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 5 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP81]], align 8 +// CHECK1-64-NEXT: [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 6 +// CHECK1-64-NEXT: [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP4]], i64* [[TMP83]], align 8 +// CHECK1-64-NEXT: [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 6 +// CHECK1-64-NEXT: [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP4]], i64* [[TMP85]], align 8 +// CHECK1-64-NEXT: [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 6 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP86]], align 8 +// CHECK1-64-NEXT: [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 7 +// CHECK1-64-NEXT: [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double** +// CHECK1-64-NEXT: store double* [[VLA1]], double** [[TMP88]], align 8 +// CHECK1-64-NEXT: [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 7 +// CHECK1-64-NEXT: [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double** +// CHECK1-64-NEXT: store double* [[VLA1]], double** [[TMP90]], align 8 +// CHECK1-64-NEXT: [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7 +// CHECK1-64-NEXT: store i64 [[TMP49]], i64* [[TMP91]], align 8 +// CHECK1-64-NEXT: [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 7 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP92]], align 8 +// CHECK1-64-NEXT: [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 8 +// CHECK1-64-NEXT: [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT** +// CHECK1-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 8 +// CHECK1-64-NEXT: [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 8 +// CHECK1-64-NEXT: [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT** +// CHECK1-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 8 +// CHECK1-64-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 8 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP97]], align 8 +// CHECK1-64-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK1-64-NEXT: [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK1-64-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 0 +// CHECK1-64-NEXT: store i32 2, i32* [[TMP101]], align 4 +// CHECK1-64-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 1 +// CHECK1-64-NEXT: store i32 9, i32* [[TMP102]], align 4 +// CHECK1-64-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 2 +// CHECK1-64-NEXT: store i8** [[TMP98]], i8*** [[TMP103]], align 8 +// CHECK1-64-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 3 +// CHECK1-64-NEXT: store i8** [[TMP99]], i8*** [[TMP104]], align 8 +// CHECK1-64-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 4 +// CHECK1-64-NEXT: store i64* [[TMP100]], i64** [[TMP105]], align 8 +// CHECK1-64-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 5 +// CHECK1-64-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 8 +// CHECK1-64-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 6 +// CHECK1-64-NEXT: store i8** null, i8*** [[TMP107]], align 8 +// CHECK1-64-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 7 +// CHECK1-64-NEXT: store i8** null, i8*** [[TMP108]], align 8 +// CHECK1-64-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 8 +// CHECK1-64-NEXT: store i64 0, i64* [[TMP109]], align 8 +// CHECK1-64-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 9 +// CHECK1-64-NEXT: store i64 0, i64* [[TMP110]], align 8 +// CHECK1-64-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 10 +// CHECK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4 +// CHECK1-64-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 11 +// CHECK1-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4 +// CHECK1-64-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 12 +// CHECK1-64-NEXT: store i32 0, i32* [[TMP113]], align 4 +// CHECK1-64-NEXT: [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]]) +// CHECK1-64-NEXT: [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0 +// CHECK1-64-NEXT: br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]] +// CHECK1-64: omp_offload.failed8: +// CHECK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i64 [[TMP46]], [10 x float]* [[B]], i64 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP4]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]] +// CHECK1-64-NEXT: br label [[OMP_OFFLOAD_CONT9]] +// CHECK1-64: omp_offload.cont9: +// CHECK1-64-NEXT: [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double** +// CHECK1-64-NEXT: store double* [[TMP116]], double** [[TMP118]], align 8 +// CHECK1-64-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double** +// CHECK1-64-NEXT: store double* [[TMP116]], double** [[TMP120]], align 8 +// CHECK1-64-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP121]], align 8 +// CHECK1-64-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1 +// CHECK1-64-NEXT: [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0** +// CHECK1-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 8 +// CHECK1-64-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 1 +// CHECK1-64-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0** +// CHECK1-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 8 +// CHECK1-64-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP126]], align 8 +// CHECK1-64-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CHECK1-64-NEXT: [[KERNEL_ARGS13:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK1-64-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 0 +// CHECK1-64-NEXT: store i32 2, i32* [[TMP129]], align 4 +// CHECK1-64-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 1 +// CHECK1-64-NEXT: store i32 2, i32* [[TMP130]], align 4 +// CHECK1-64-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 2 +// CHECK1-64-NEXT: store i8** [[TMP127]], i8*** [[TMP131]], align 8 +// CHECK1-64-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 3 +// CHECK1-64-NEXT: store i8** [[TMP128]], i8*** [[TMP132]], align 8 +// CHECK1-64-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 4 +// CHECK1-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 8 +// CHECK1-64-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 5 +// CHECK1-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 8 +// CHECK1-64-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 6 +// CHECK1-64-NEXT: store i8** null, i8*** [[TMP135]], align 8 +// CHECK1-64-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 7 +// CHECK1-64-NEXT: store i8** null, i8*** [[TMP136]], align 8 +// CHECK1-64-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 8 +// CHECK1-64-NEXT: store i64 0, i64* [[TMP137]], align 8 +// CHECK1-64-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 9 +// CHECK1-64-NEXT: store i64 0, i64* [[TMP138]], align 8 +// CHECK1-64-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 10 +// CHECK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4 +// CHECK1-64-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 11 +// CHECK1-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4 +// CHECK1-64-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 12 +// CHECK1-64-NEXT: store i32 0, i32* [[TMP141]], align 4 +// CHECK1-64-NEXT: [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]]) +// CHECK1-64-NEXT: [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0 +// CHECK1-64-NEXT: br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED14:%.*]], label [[OMP_OFFLOAD_CONT15:%.*]] +// CHECK1-64: omp_offload.failed14: +// CHECK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]] +// CHECK1-64-NEXT: br label [[OMP_OFFLOAD_CONT15]] +// CHECK1-64: omp_offload.cont15: +// CHECK1-64-NEXT: [[TMP144:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-64-NEXT: [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-64-NEXT: call void @llvm.stackrestore(i8* [[TMP145]]) +// CHECK1-64-NEXT: ret i32 [[TMP144]] +// CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// CHECK1-64-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-64-NEXT: entry: +// CHECK1-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-64-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK1-64-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 8 +// CHECK1-64-NEXT: store i64 [[GA]], i64* [[GA_ADDR]], align 8 +// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK1-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32* +// CHECK1-64-NEXT: ret void +// CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// CHECK1-64-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] { +// CHECK1-64-NEXT: entry: +// CHECK1-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 +// CHECK1-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-64-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 +// CHECK1-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-64-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 +// CHECK1-64-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// CHECK1-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK1-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK1-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// CHECK1-64-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 +// CHECK1-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 +// CHECK1-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// CHECK1-64-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 +// CHECK1-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 +// CHECK1-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// CHECK1-64-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 +// CHECK1-64-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 +// CHECK1-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 +// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK1-64-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// CHECK1-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 +// CHECK1-64-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// CHECK1-64-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false) +// CHECK1-64-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// CHECK1-64-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8 +// CHECK1-64-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4 +// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-64-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4 +// CHECK1-64-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// CHECK1-64-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false) +// CHECK1-64-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// CHECK1-64-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false) +// CHECK1-64-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// CHECK1-64-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8 +// CHECK1-64-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// CHECK1-64-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8 +// CHECK1-64-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// CHECK1-64-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 +// CHECK1-64-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// CHECK1-64-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false) +// CHECK1-64-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// CHECK1-64-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false) +// CHECK1-64-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK1-64-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK1-64-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// CHECK1-64-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK1-64-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// CHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2 +// CHECK1-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK1-64-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3 +// CHECK1-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// CHECK1-64-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1 +// CHECK1-64-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2 +// CHECK1-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// CHECK1-64-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]] +// CHECK1-64-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]] +// CHECK1-64-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3 +// CHECK1-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// CHECK1-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// CHECK1-64-NEXT: store i64 1, i64* [[X]], align 8 +// CHECK1-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// CHECK1-64-NEXT: store i8 1, i8* [[Y]], align 8 +// CHECK1-64-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-64-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// CHECK1-64-NEXT: ret void +// CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// CHECK1-64-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] { +// CHECK1-64-NEXT: entry: +// CHECK1-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-64-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8 +// CHECK1-64-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK1-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK1-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8* +// CHECK1-64-NEXT: [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8* +// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[X]], align 4 +// CHECK1-64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP3]] to double +// CHECK1-64-NEXT: [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 0 +// CHECK1-64-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 8 +// CHECK1-64-NEXT: [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK1-64-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 0 +// CHECK1-64-NEXT: [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 8 +// CHECK1-64-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// CHECK1-64-NEXT: store double [[INC]], double* [[ARRAYIDX2]], align 8 +// CHECK1-64-NEXT: ret void +// CHECK1-64-LABEL: define {{[^@]+}}@_Z3bariPd +// CHECK1-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// CHECK1-64-NEXT: entry: +// CHECK1-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-64-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 +// CHECK1-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK1-64-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK1-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK1-64-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]]) +// CHECK1-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// CHECK1-64-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK1-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]]) +// CHECK1-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// CHECK1-64-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK1-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]]) +// CHECK1-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// CHECK1-64-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// CHECK1-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]]) +// CHECK1-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// CHECK1-64-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// CHECK1-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-64-NEXT: ret i32 [[TMP9]] +// CHECK1-64-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// CHECK1-64-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK1-64-NEXT: entry: +// CHECK1-64-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK1-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-64-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK1-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK1-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8 +// CHECK1-64-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// CHECK1-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK1-64-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// CHECK1-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK1-64-NEXT: [[TMP3:%.*]] = call i8* @llvm.stacksave() +// CHECK1-64-NEXT: store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8 +// CHECK1-64-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]] +// CHECK1-64-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2 +// CHECK1-64-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to i32* +// CHECK1-64-NEXT: store i32 [[TMP5]], i32* [[CONV]], align 4 +// CHECK1-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 +// CHECK1-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP7:%.*]] = mul nuw i64 2, [[TMP2]] +// CHECK1-64-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 +// CHECK1-64-NEXT: [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP9]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i64 40, i1 false) +// CHECK1-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1** +// CHECK1-64-NEXT: store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8 +// CHECK1-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double** +// CHECK1-64-NEXT: store double* [[A]], double** [[TMP13]], align 8 +// CHECK1-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP14]], align 8 +// CHECK1-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-64-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP6]], i64* [[TMP16]], align 8 +// CHECK1-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-64-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP6]], i64* [[TMP18]], align 8 +// CHECK1-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP19]], align 8 +// CHECK1-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-64-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64* +// CHECK1-64-NEXT: store i64 2, i64* [[TMP21]], align 8 +// CHECK1-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-64-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64* +// CHECK1-64-NEXT: store i64 2, i64* [[TMP23]], align 8 +// CHECK1-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP24]], align 8 +// CHECK1-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK1-64-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP2]], i64* [[TMP26]], align 8 +// CHECK1-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK1-64-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP2]], i64* [[TMP28]], align 8 +// CHECK1-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP29]], align 8 +// CHECK1-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK1-64-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16** +// CHECK1-64-NEXT: store i16* [[VLA]], i16** [[TMP31]], align 8 +// CHECK1-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK1-64-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16** +// CHECK1-64-NEXT: store i16* [[VLA]], i16** [[TMP33]], align 8 +// CHECK1-64-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK1-64-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 8 +// CHECK1-64-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP35]], align 8 +// CHECK1-64-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK1-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-64-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-64-NEXT: store i32 2, i32* [[TMP39]], align 4 +// CHECK1-64-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-64-NEXT: store i32 5, i32* [[TMP40]], align 4 +// CHECK1-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-64-NEXT: store i8** [[TMP36]], i8*** [[TMP41]], align 8 +// CHECK1-64-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-64-NEXT: store i8** [[TMP37]], i8*** [[TMP42]], align 8 +// CHECK1-64-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-64-NEXT: store i64* [[TMP38]], i64** [[TMP43]], align 8 +// CHECK1-64-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-64-NEXT: store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 8 +// CHECK1-64-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-64-NEXT: store i8** null, i8*** [[TMP45]], align 8 +// CHECK1-64-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-64-NEXT: store i8** null, i8*** [[TMP46]], align 8 +// CHECK1-64-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-64-NEXT: store i64 0, i64* [[TMP47]], align 8 +// CHECK1-64-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-64-NEXT: store i64 0, i64* [[TMP48]], align 8 +// CHECK1-64-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4 +// CHECK1-64-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4 +// CHECK1-64-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-64-NEXT: store i32 0, i32* [[TMP51]], align 4 +// CHECK1-64-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK1-64-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +// CHECK1-64-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-64: omp_offload.failed: +// CHECK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]] +// CHECK1-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1-64: omp_offload.cont: +// CHECK1-64-NEXT: [[TMP54:%.*]] = mul nsw i64 1, [[TMP2]] +// CHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP54]] +// CHECK1-64-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK1-64-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2 +// CHECK1-64-NEXT: [[CONV3:%.*]] = sext i16 [[TMP55]] to i32 +// CHECK1-64-NEXT: [[TMP56:%.*]] = load i32, i32* [[B]], align 4 +// CHECK1-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP56]] +// CHECK1-64-NEXT: [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-64-NEXT: call void @llvm.stackrestore(i8* [[TMP57]]) +// CHECK1-64-NEXT: ret i32 [[ADD4]] +// CHECK1-64-LABEL: define {{[^@]+}}@_ZL7fstatici +// CHECK1-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] { +// CHECK1-64-NEXT: entry: +// CHECK1-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-64-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// CHECK1-64-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK1-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK1-64-NEXT: store i8 0, i8* [[AAA]], align 1 +// CHECK1-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK1-64-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 +// CHECK1-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK1-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK1-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* +// CHECK1-64-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 +// CHECK1-64-NEXT: [[TMP3:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 +// CHECK1-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8 +// CHECK1-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8 +// CHECK1-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP8]], align 8 +// CHECK1-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-64-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP3]], i64* [[TMP10]], align 8 +// CHECK1-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-64-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP3]], i64* [[TMP12]], align 8 +// CHECK1-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP13]], align 8 +// CHECK1-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-64-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]** +// CHECK1-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 8 +// CHECK1-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-64-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]** +// CHECK1-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 8 +// CHECK1-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP18]], align 8 +// CHECK1-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-64-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK1-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-64-NEXT: store i32 3, i32* [[TMP22]], align 4 +// CHECK1-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-64-NEXT: store i8** [[TMP19]], i8*** [[TMP23]], align 8 +// CHECK1-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-64-NEXT: store i8** [[TMP20]], i8*** [[TMP24]], align 8 +// CHECK1-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 8 +// CHECK1-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 8 +// CHECK1-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-64-NEXT: store i8** null, i8*** [[TMP27]], align 8 +// CHECK1-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-64-NEXT: store i8** null, i8*** [[TMP28]], align 8 +// CHECK1-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-64-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK1-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-64-NEXT: store i64 0, i64* [[TMP30]], align 8 +// CHECK1-64-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4 +// CHECK1-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4 +// CHECK1-64-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-64-NEXT: store i32 0, i32* [[TMP33]], align 4 +// CHECK1-64-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK1-64-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK1-64-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-64: omp_offload.failed: +// CHECK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i64 [[TMP1]], i64 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK1-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1-64: omp_offload.cont: +// CHECK1-64-NEXT: [[TMP36:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-64-NEXT: ret i32 [[TMP36]] +// CHECK1-64-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// CHECK1-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat { +// CHECK1-64-NEXT: entry: +// CHECK1-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-64-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK1-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK1-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK1-64-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 +// CHECK1-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK1-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[TMP3]], align 8 +// CHECK1-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8 +// CHECK1-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP6]], align 8 +// CHECK1-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-64-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]** +// CHECK1-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 8 +// CHECK1-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-64-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]** +// CHECK1-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 8 +// CHECK1-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP11]], align 8 +// CHECK1-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-64-NEXT: store i32 2, i32* [[TMP14]], align 4 +// CHECK1-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-64-NEXT: store i32 2, i32* [[TMP15]], align 4 +// CHECK1-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-64-NEXT: store i8** [[TMP12]], i8*** [[TMP16]], align 8 +// CHECK1-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-64-NEXT: store i8** [[TMP13]], i8*** [[TMP17]], align 8 +// CHECK1-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 8 +// CHECK1-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 8 +// CHECK1-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-64-NEXT: store i8** null, i8*** [[TMP20]], align 8 +// CHECK1-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-64-NEXT: store i8** null, i8*** [[TMP21]], align 8 +// CHECK1-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-64-NEXT: store i64 0, i64* [[TMP22]], align 8 +// CHECK1-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-64-NEXT: store i64 0, i64* [[TMP23]], align 8 +// CHECK1-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4 +// CHECK1-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4 +// CHECK1-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-64-NEXT: store i32 0, i32* [[TMP26]], align 4 +// CHECK1-64-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK1-64-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-64-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-64: omp_offload.failed: +// CHECK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i64 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK1-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1-64: omp_offload.cont: +// CHECK1-64-NEXT: [[TMP29:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-64-NEXT: ret i32 [[TMP29]] +// CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// CHECK1-64-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-64-NEXT: entry: +// CHECK1-64-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK1-64-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// CHECK1-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK1-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// CHECK1-64-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 +// CHECK1-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// CHECK1-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// CHECK1-64-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* +// CHECK1-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// CHECK1-64-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// CHECK1-64-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8 +// CHECK1-64-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// CHECK1-64-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2 +// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-64-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8 +// CHECK1-64-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// CHECK1-64-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2 +// CHECK1-64-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// CHECK1-64-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false) +// CHECK1-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-64-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double +// CHECK1-64-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 +// CHECK1-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK1-64-NEXT: store double [[ADD]], double* [[A]], align 8 +// CHECK1-64-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP11:%.*]] = load double, double* [[A5]], align 8 +// CHECK1-64-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK1-64-NEXT: store double [[INC]], double* [[A5]], align 8 +// CHECK1-64-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 +// CHECK1-64-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]] +// CHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]] +// CHECK1-64-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK1-64-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK1-64-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-64-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// CHECK1-64-NEXT: ret void +// CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// CHECK1-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK1-64-NEXT: entry: +// CHECK1-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-64-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4 +// CHECK1-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK1-64-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 +// CHECK1-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK1-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* +// CHECK1-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8* +// CHECK1-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// CHECK1-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-64-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK1-64-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK1-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK1-64-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// CHECK1-64-NEXT: store i8 [[CONV5]], i8* [[CONV1]], align 1 +// CHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2 +// CHECK1-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-64-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK1-64-NEXT: ret void +// CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// CHECK1-64-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK1-64-NEXT: entry: +// CHECK1-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-64-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK1-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK1-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK1-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK1-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// CHECK1-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2 +// CHECK1-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK1-64-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// CHECK1-64-NEXT: ret void +// CHECK1-64-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK1-64-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK1-64-NEXT: entry: +// CHECK1-64-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK1-64-NEXT: ret void +// CHECK2-32-LABEL: define {{[^@]+}}@_Z3fooiPd +// CHECK2-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK2-32-NEXT: entry: +// CHECK2-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK2-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK2-32-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// CHECK2-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK2-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK2-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// CHECK2-32-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK2-32-NEXT: [[P:%.*]] = alloca i32*, align 64 +// CHECK2-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[GA_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK2-32-NEXT: store i16 0, i16* [[AA]], align 2 +// CHECK2-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP1:%.*]] = call i8* @llvm.stacksave() +// CHECK2-32-NEXT: store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4 +// CHECK2-32-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4 +// CHECK2-32-NEXT: store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4 +// CHECK2-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]] +// CHECK2-32-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8 +// CHECK2-32-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// CHECK2-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 [[TMP4]], i32* [[X]], align 4 +// CHECK2-32-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// CHECK2-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 [[TMP5]], i32* [[Y]], align 4 +// CHECK2-32-NEXT: store i32* [[A]], i32** [[P]], align 64 +// CHECK2-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-32-NEXT: store i32 [[TMP6]], i32* [[A_CASTED]], align 4 +// CHECK2-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK2-32-NEXT: [[TMP8:%.*]] = load i32*, i32** [[P]], align 64 +// CHECK2-32-NEXT: [[TMP9:%.*]] = load i32, i32* @ga, align 4 +// CHECK2-32-NEXT: store i32 [[TMP9]], i32* [[GA_CASTED]], align 4 +// CHECK2-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[GA_CASTED]], align 4 +// CHECK2-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP7]], i32* [[TMP12]], align 4 +// CHECK2-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP7]], i32* [[TMP14]], align 4 +// CHECK2-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP15]], align 4 +// CHECK2-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK2-32-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32** +// CHECK2-32-NEXT: store i32* [[TMP8]], i32** [[TMP17]], align 4 +// CHECK2-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK2-32-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32** +// CHECK2-32-NEXT: store i32* [[TMP8]], i32** [[TMP19]], align 4 +// CHECK2-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP20]], align 4 +// CHECK2-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK2-32-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP10]], i32* [[TMP22]], align 4 +// CHECK2-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK2-32-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP10]], i32* [[TMP24]], align 4 +// CHECK2-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP25]], align 4 +// CHECK2-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK2-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK2-32-NEXT: store i32 2, i32* [[TMP28]], align 4 +// CHECK2-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK2-32-NEXT: store i32 3, i32* [[TMP29]], align 4 +// CHECK2-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK2-32-NEXT: store i8** [[TMP26]], i8*** [[TMP30]], align 4 +// CHECK2-32-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK2-32-NEXT: store i8** [[TMP27]], i8*** [[TMP31]], align 4 +// CHECK2-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK2-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP32]], align 4 +// CHECK2-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK2-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP33]], align 4 +// CHECK2-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK2-32-NEXT: store i8** null, i8*** [[TMP34]], align 4 +// CHECK2-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK2-32-NEXT: store i8** null, i8*** [[TMP35]], align 4 +// CHECK2-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK2-32-NEXT: store i64 0, i64* [[TMP36]], align 8 +// CHECK2-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK2-32-NEXT: store i64 0, i64* [[TMP37]], align 8 +// CHECK2-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK2-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP38]], align 4 +// CHECK2-32-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK2-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP39]], align 4 +// CHECK2-32-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK2-32-NEXT: store i32 0, i32* [[TMP40]], align 4 +// CHECK2-32-NEXT: [[TMP41:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK2-32-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK2-32-NEXT: br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK2-32: omp_offload.failed: +// CHECK2-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i32 [[TMP7]], i32* [[TMP8]], i32 [[TMP10]]) #[[ATTR3:[0-9]+]] +// CHECK2-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK2-32: omp_offload.cont: +// CHECK2-32-NEXT: [[TMP43:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK2-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_CASTED]] to i16* +// CHECK2-32-NEXT: store i16 [[TMP43]], i16* [[CONV]], align 2 +// CHECK2-32-NEXT: [[TMP44:%.*]] = load i32, i32* [[AA_CASTED]], align 4 +// CHECK2-32-NEXT: [[TMP45:%.*]] = mul nuw i32 [[TMP0]], 4 +// CHECK2-32-NEXT: [[TMP46:%.*]] = sext i32 [[TMP45]] to i64 +// CHECK2-32-NEXT: [[TMP47:%.*]] = mul nuw i32 5, [[TMP2]] +// CHECK2-32-NEXT: [[TMP48:%.*]] = mul nuw i32 [[TMP47]], 8 +// CHECK2-32-NEXT: [[TMP49:%.*]] = sext i32 [[TMP48]] to i64 +// CHECK2-32-NEXT: [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP50]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i32 72, i1 false) +// CHECK2-32-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP44]], i32* [[TMP52]], align 4 +// CHECK2-32-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP44]], i32* [[TMP54]], align 4 +// CHECK2-32-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP55]], align 4 +// CHECK2-32-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1 +// CHECK2-32-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]** +// CHECK2-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 4 +// CHECK2-32-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 1 +// CHECK2-32-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]** +// CHECK2-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 4 +// CHECK2-32-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP60]], align 4 +// CHECK2-32-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2 +// CHECK2-32-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP0]], i32* [[TMP62]], align 4 +// CHECK2-32-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 2 +// CHECK2-32-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP0]], i32* [[TMP64]], align 4 +// CHECK2-32-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 2 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP65]], align 4 +// CHECK2-32-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3 +// CHECK2-32-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float** +// CHECK2-32-NEXT: store float* [[VLA]], float** [[TMP67]], align 4 +// CHECK2-32-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 3 +// CHECK2-32-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float** +// CHECK2-32-NEXT: store float* [[VLA]], float** [[TMP69]], align 4 +// CHECK2-32-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3 +// CHECK2-32-NEXT: store i64 [[TMP46]], i64* [[TMP70]], align 4 +// CHECK2-32-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 3 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP71]], align 4 +// CHECK2-32-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4 +// CHECK2-32-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]** +// CHECK2-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 4 +// CHECK2-32-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 4 +// CHECK2-32-NEXT: [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]** +// CHECK2-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 4 +// CHECK2-32-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 4 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP76]], align 4 +// CHECK2-32-NEXT: [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5 +// CHECK2-32-NEXT: [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32* +// CHECK2-32-NEXT: store i32 5, i32* [[TMP78]], align 4 +// CHECK2-32-NEXT: [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 5 +// CHECK2-32-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32* +// CHECK2-32-NEXT: store i32 5, i32* [[TMP80]], align 4 +// CHECK2-32-NEXT: [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 5 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP81]], align 4 +// CHECK2-32-NEXT: [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6 +// CHECK2-32-NEXT: [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP2]], i32* [[TMP83]], align 4 +// CHECK2-32-NEXT: [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 6 +// CHECK2-32-NEXT: [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP2]], i32* [[TMP85]], align 4 +// CHECK2-32-NEXT: [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 6 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP86]], align 4 +// CHECK2-32-NEXT: [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7 +// CHECK2-32-NEXT: [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double** +// CHECK2-32-NEXT: store double* [[VLA1]], double** [[TMP88]], align 4 +// CHECK2-32-NEXT: [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 7 +// CHECK2-32-NEXT: [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double** +// CHECK2-32-NEXT: store double* [[VLA1]], double** [[TMP90]], align 4 +// CHECK2-32-NEXT: [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7 +// CHECK2-32-NEXT: store i64 [[TMP49]], i64* [[TMP91]], align 4 +// CHECK2-32-NEXT: [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 7 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP92]], align 4 +// CHECK2-32-NEXT: [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8 +// CHECK2-32-NEXT: [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT** +// CHECK2-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 4 +// CHECK2-32-NEXT: [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 8 +// CHECK2-32-NEXT: [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT** +// CHECK2-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 4 +// CHECK2-32-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 8 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP97]], align 4 +// CHECK2-32-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK2-32-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK2-32-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 0 +// CHECK2-32-NEXT: store i32 2, i32* [[TMP101]], align 4 +// CHECK2-32-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 1 +// CHECK2-32-NEXT: store i32 9, i32* [[TMP102]], align 4 +// CHECK2-32-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 2 +// CHECK2-32-NEXT: store i8** [[TMP98]], i8*** [[TMP103]], align 4 +// CHECK2-32-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 3 +// CHECK2-32-NEXT: store i8** [[TMP99]], i8*** [[TMP104]], align 4 +// CHECK2-32-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 4 +// CHECK2-32-NEXT: store i64* [[TMP100]], i64** [[TMP105]], align 4 +// CHECK2-32-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 5 +// CHECK2-32-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 4 +// CHECK2-32-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 6 +// CHECK2-32-NEXT: store i8** null, i8*** [[TMP107]], align 4 +// CHECK2-32-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 7 +// CHECK2-32-NEXT: store i8** null, i8*** [[TMP108]], align 4 +// CHECK2-32-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 8 +// CHECK2-32-NEXT: store i64 0, i64* [[TMP109]], align 8 +// CHECK2-32-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 9 +// CHECK2-32-NEXT: store i64 0, i64* [[TMP110]], align 8 +// CHECK2-32-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 10 +// CHECK2-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4 +// CHECK2-32-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 11 +// CHECK2-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4 +// CHECK2-32-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 12 +// CHECK2-32-NEXT: store i32 0, i32* [[TMP113]], align 4 +// CHECK2-32-NEXT: [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]]) +// CHECK2-32-NEXT: [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0 +// CHECK2-32-NEXT: br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CHECK2-32: omp_offload.failed6: +// CHECK2-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i32 [[TMP44]], [10 x float]* [[B]], i32 [[TMP0]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP2]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]] +// CHECK2-32-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CHECK2-32: omp_offload.cont7: +// CHECK2-32-NEXT: [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double** +// CHECK2-32-NEXT: store double* [[TMP116]], double** [[TMP118]], align 4 +// CHECK2-32-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double** +// CHECK2-32-NEXT: store double* [[TMP116]], double** [[TMP120]], align 4 +// CHECK2-32-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP121]], align 4 +// CHECK2-32-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1 +// CHECK2-32-NEXT: [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0** +// CHECK2-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 4 +// CHECK2-32-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 1 +// CHECK2-32-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0** +// CHECK2-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 4 +// CHECK2-32-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP126]], align 4 +// CHECK2-32-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 +// CHECK2-32-NEXT: [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK2-32-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 0 +// CHECK2-32-NEXT: store i32 2, i32* [[TMP129]], align 4 +// CHECK2-32-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 1 +// CHECK2-32-NEXT: store i32 2, i32* [[TMP130]], align 4 +// CHECK2-32-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 2 +// CHECK2-32-NEXT: store i8** [[TMP127]], i8*** [[TMP131]], align 4 +// CHECK2-32-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 3 +// CHECK2-32-NEXT: store i8** [[TMP128]], i8*** [[TMP132]], align 4 +// CHECK2-32-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 4 +// CHECK2-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 4 +// CHECK2-32-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 5 +// CHECK2-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 4 +// CHECK2-32-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 6 +// CHECK2-32-NEXT: store i8** null, i8*** [[TMP135]], align 4 +// CHECK2-32-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 7 +// CHECK2-32-NEXT: store i8** null, i8*** [[TMP136]], align 4 +// CHECK2-32-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 8 +// CHECK2-32-NEXT: store i64 0, i64* [[TMP137]], align 8 +// CHECK2-32-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 9 +// CHECK2-32-NEXT: store i64 0, i64* [[TMP138]], align 8 +// CHECK2-32-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 10 +// CHECK2-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4 +// CHECK2-32-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 11 +// CHECK2-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4 +// CHECK2-32-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 12 +// CHECK2-32-NEXT: store i32 0, i32* [[TMP141]], align 4 +// CHECK2-32-NEXT: [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]]) +// CHECK2-32-NEXT: [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0 +// CHECK2-32-NEXT: br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]] +// CHECK2-32: omp_offload.failed12: +// CHECK2-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]] +// CHECK2-32-NEXT: br label [[OMP_OFFLOAD_CONT13]] +// CHECK2-32: omp_offload.cont13: +// CHECK2-32-NEXT: [[TMP144:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-32-NEXT: [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK2-32-NEXT: call void @llvm.stackrestore(i8* [[TMP145]]) +// CHECK2-32-NEXT: ret i32 [[TMP144]] +// CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// CHECK2-32-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK2-32-NEXT: entry: +// CHECK2-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-32-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK2-32-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 [[GA]], i32* [[GA_ADDR]], align 4 +// CHECK2-32-NEXT: ret void +// CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// CHECK2-32-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] { +// CHECK2-32-NEXT: entry: +// CHECK2-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 +// CHECK2-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 +// CHECK2-32-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 +// CHECK2-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 +// CHECK2-32-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 +// CHECK2-32-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// CHECK2-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK2-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK2-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// CHECK2-32-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 +// CHECK2-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// CHECK2-32-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 +// CHECK2-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// CHECK2-32-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 +// CHECK2-32-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 +// CHECK2-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 +// CHECK2-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK2-32-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// CHECK2-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 +// CHECK2-32-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// CHECK2-32-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false) +// CHECK2-32-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// CHECK2-32-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4 +// CHECK2-32-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4 +// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK2-32-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4 +// CHECK2-32-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// CHECK2-32-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false) +// CHECK2-32-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// CHECK2-32-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false) +// CHECK2-32-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// CHECK2-32-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8 +// CHECK2-32-NEXT: store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4 +// CHECK2-32-NEXT: store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4 +// CHECK2-32-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// CHECK2-32-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8 +// CHECK2-32-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// CHECK2-32-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false) +// CHECK2-32-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// CHECK2-32-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false) +// CHECK2-32-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK2-32-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK2-32-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// CHECK2-32-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK2-32-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// CHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2 +// CHECK2-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK2-32-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3 +// CHECK2-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// CHECK2-32-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1 +// CHECK2-32-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 +// CHECK2-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// CHECK2-32-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]] +// CHECK2-32-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]] +// CHECK2-32-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3 +// CHECK2-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// CHECK2-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// CHECK2-32-NEXT: store i64 1, i64* [[X]], align 4 +// CHECK2-32-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// CHECK2-32-NEXT: store i8 1, i8* [[Y]], align 4 +// CHECK2-32-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK2-32-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// CHECK2-32-NEXT: ret void +// CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// CHECK2-32-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] { +// CHECK2-32-NEXT: entry: +// CHECK2-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK2-32-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4 +// CHECK2-32-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK2-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK2-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8* +// CHECK2-32-NEXT: [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8* +// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 8, i1 false) +// CHECK2-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[X]], align 4 +// CHECK2-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP3]] to double +// CHECK2-32-NEXT: [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i32 0 +// CHECK2-32-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 4 +// CHECK2-32-NEXT: [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK2-32-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i32 0 +// CHECK2-32-NEXT: [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 4 +// CHECK2-32-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// CHECK2-32-NEXT: store double [[INC]], double* [[ARRAYIDX2]], align 4 +// CHECK2-32-NEXT: ret void +// CHECK2-32-LABEL: define {{[^@]+}}@_Z3bariPd +// CHECK2-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// CHECK2-32-NEXT: entry: +// CHECK2-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK2-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 +// CHECK2-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK2-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK2-32-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]]) +// CHECK2-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// CHECK2-32-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK2-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]]) +// CHECK2-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// CHECK2-32-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK2-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]]) +// CHECK2-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// CHECK2-32-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// CHECK2-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]]) +// CHECK2-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// CHECK2-32-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// CHECK2-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-32-NEXT: ret i32 [[TMP9]] +// CHECK2-32-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// CHECK2-32-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK2-32-NEXT: entry: +// CHECK2-32-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK2-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK2-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4 +// CHECK2-32-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK2-32-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// CHECK2-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// CHECK2-32-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4 +// CHECK2-32-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]] +// CHECK2-32-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2 +// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK2-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[B]], align 4 +// CHECK2-32-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 +// CHECK2-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 +// CHECK2-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP6:%.*]] = mul nuw i32 2, [[TMP1]] +// CHECK2-32-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// CHECK2-32-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 +// CHECK2-32-NEXT: [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i32 40, i1 false) +// CHECK2-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1** +// CHECK2-32-NEXT: store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4 +// CHECK2-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double** +// CHECK2-32-NEXT: store double* [[A]], double** [[TMP13]], align 4 +// CHECK2-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP14]], align 4 +// CHECK2-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK2-32-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP5]], i32* [[TMP16]], align 4 +// CHECK2-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK2-32-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP5]], i32* [[TMP18]], align 4 +// CHECK2-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP19]], align 4 +// CHECK2-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK2-32-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32* +// CHECK2-32-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK2-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK2-32-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32* +// CHECK2-32-NEXT: store i32 2, i32* [[TMP23]], align 4 +// CHECK2-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP24]], align 4 +// CHECK2-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK2-32-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[TMP26]], align 4 +// CHECK2-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK2-32-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[TMP28]], align 4 +// CHECK2-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP29]], align 4 +// CHECK2-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK2-32-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16** +// CHECK2-32-NEXT: store i16* [[VLA]], i16** [[TMP31]], align 4 +// CHECK2-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK2-32-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16** +// CHECK2-32-NEXT: store i16* [[VLA]], i16** [[TMP33]], align 4 +// CHECK2-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK2-32-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 4 +// CHECK2-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP35]], align 4 +// CHECK2-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK2-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK2-32-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK2-32-NEXT: store i32 2, i32* [[TMP39]], align 4 +// CHECK2-32-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK2-32-NEXT: store i32 5, i32* [[TMP40]], align 4 +// CHECK2-32-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK2-32-NEXT: store i8** [[TMP36]], i8*** [[TMP41]], align 4 +// CHECK2-32-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK2-32-NEXT: store i8** [[TMP37]], i8*** [[TMP42]], align 4 +// CHECK2-32-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK2-32-NEXT: store i64* [[TMP38]], i64** [[TMP43]], align 4 +// CHECK2-32-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK2-32-NEXT: store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 4 +// CHECK2-32-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK2-32-NEXT: store i8** null, i8*** [[TMP45]], align 4 +// CHECK2-32-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK2-32-NEXT: store i8** null, i8*** [[TMP46]], align 4 +// CHECK2-32-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK2-32-NEXT: store i64 0, i64* [[TMP47]], align 8 +// CHECK2-32-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK2-32-NEXT: store i64 0, i64* [[TMP48]], align 8 +// CHECK2-32-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK2-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4 +// CHECK2-32-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK2-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4 +// CHECK2-32-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK2-32-NEXT: store i32 0, i32* [[TMP51]], align 4 +// CHECK2-32-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK2-32-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +// CHECK2-32-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK2-32: omp_offload.failed: +// CHECK2-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]] +// CHECK2-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK2-32: omp_offload.cont: +// CHECK2-32-NEXT: [[TMP54:%.*]] = mul nsw i32 1, [[TMP1]] +// CHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP54]] +// CHECK2-32-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK2-32-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2 +// CHECK2-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP55]] to i32 +// CHECK2-32-NEXT: [[TMP56:%.*]] = load i32, i32* [[B]], align 4 +// CHECK2-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP56]] +// CHECK2-32-NEXT: [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK2-32-NEXT: call void @llvm.stackrestore(i8* [[TMP57]]) +// CHECK2-32-NEXT: ret i32 [[ADD3]] +// CHECK2-32-LABEL: define {{[^@]+}}@_ZL7fstatici +// CHECK2-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] { +// CHECK2-32-NEXT: entry: +// CHECK2-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// CHECK2-32-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK2-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK2-32-NEXT: store i8 0, i8* [[AAA]], align 1 +// CHECK2-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-32-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 +// CHECK2-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK2-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK2-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* +// CHECK2-32-NEXT: store i8 [[TMP2]], i8* [[CONV]], align 1 +// CHECK2-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 +// CHECK2-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4 +// CHECK2-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4 +// CHECK2-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP8]], align 4 +// CHECK2-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK2-32-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP3]], i32* [[TMP10]], align 4 +// CHECK2-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK2-32-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP3]], i32* [[TMP12]], align 4 +// CHECK2-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP13]], align 4 +// CHECK2-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK2-32-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]** +// CHECK2-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 4 +// CHECK2-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK2-32-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]** +// CHECK2-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 4 +// CHECK2-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP18]], align 4 +// CHECK2-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK2-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK2-32-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK2-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK2-32-NEXT: store i32 3, i32* [[TMP22]], align 4 +// CHECK2-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK2-32-NEXT: store i8** [[TMP19]], i8*** [[TMP23]], align 4 +// CHECK2-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK2-32-NEXT: store i8** [[TMP20]], i8*** [[TMP24]], align 4 +// CHECK2-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK2-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 4 +// CHECK2-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK2-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 4 +// CHECK2-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK2-32-NEXT: store i8** null, i8*** [[TMP27]], align 4 +// CHECK2-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK2-32-NEXT: store i8** null, i8*** [[TMP28]], align 4 +// CHECK2-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK2-32-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK2-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK2-32-NEXT: store i64 0, i64* [[TMP30]], align 8 +// CHECK2-32-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK2-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4 +// CHECK2-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK2-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4 +// CHECK2-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK2-32-NEXT: store i32 0, i32* [[TMP33]], align 4 +// CHECK2-32-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK2-32-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK2-32-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK2-32: omp_offload.failed: +// CHECK2-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i32 [[TMP1]], i32 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK2-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK2-32: omp_offload.cont: +// CHECK2-32-NEXT: [[TMP36:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-32-NEXT: ret i32 [[TMP36]] +// CHECK2-32-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// CHECK2-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat { +// CHECK2-32-NEXT: entry: +// CHECK2-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK2-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK2-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-32-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 +// CHECK2-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK2-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[TMP3]], align 4 +// CHECK2-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4 +// CHECK2-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP6]], align 4 +// CHECK2-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK2-32-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]** +// CHECK2-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 4 +// CHECK2-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK2-32-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]** +// CHECK2-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 4 +// CHECK2-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP11]], align 4 +// CHECK2-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK2-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK2-32-NEXT: store i32 2, i32* [[TMP14]], align 4 +// CHECK2-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK2-32-NEXT: store i32 2, i32* [[TMP15]], align 4 +// CHECK2-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK2-32-NEXT: store i8** [[TMP12]], i8*** [[TMP16]], align 4 +// CHECK2-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK2-32-NEXT: store i8** [[TMP13]], i8*** [[TMP17]], align 4 +// CHECK2-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK2-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 4 +// CHECK2-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK2-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 4 +// CHECK2-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK2-32-NEXT: store i8** null, i8*** [[TMP20]], align 4 +// CHECK2-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK2-32-NEXT: store i8** null, i8*** [[TMP21]], align 4 +// CHECK2-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK2-32-NEXT: store i64 0, i64* [[TMP22]], align 8 +// CHECK2-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK2-32-NEXT: store i64 0, i64* [[TMP23]], align 8 +// CHECK2-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK2-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4 +// CHECK2-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK2-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4 +// CHECK2-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK2-32-NEXT: store i32 0, i32* [[TMP26]], align 4 +// CHECK2-32-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK2-32-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK2-32-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK2-32: omp_offload.failed: +// CHECK2-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i32 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK2-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK2-32: omp_offload.cont: +// CHECK2-32-NEXT: [[TMP29:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-32-NEXT: ret i32 [[TMP29]] +// CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// CHECK2-32-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK2-32-NEXT: entry: +// CHECK2-32-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK2-32-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// CHECK2-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK2-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// CHECK2-32-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// CHECK2-32-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// CHECK2-32-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4 +// CHECK2-32-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// CHECK2-32-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2 +// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK2-32-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// CHECK2-32-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// CHECK2-32-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// CHECK2-32-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// CHECK2-32-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false) +// CHECK2-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK2-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double +// CHECK2-32-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK2-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK2-32-NEXT: store double [[ADD]], double* [[A]], align 4 +// CHECK2-32-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP11:%.*]] = load double, double* [[A4]], align 4 +// CHECK2-32-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK2-32-NEXT: store double [[INC]], double* [[A4]], align 4 +// CHECK2-32-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// CHECK2-32-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]] +// CHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]] +// CHECK2-32-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK2-32-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// CHECK2-32-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK2-32-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// CHECK2-32-NEXT: ret void +// CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// CHECK2-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK2-32-NEXT: entry: +// CHECK2-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK2-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK2-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 +// CHECK2-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK2-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* +// CHECK2-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK2-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// CHECK2-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK2-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK2-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK2-32-NEXT: [[CONV2:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK2-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 +// CHECK2-32-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 +// CHECK2-32-NEXT: store i8 [[CONV4]], i8* [[CONV]], align 1 +// CHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// CHECK2-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK2-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK2-32-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 +// CHECK2-32-NEXT: ret void +// CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// CHECK2-32-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK2-32-NEXT: entry: +// CHECK2-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK2-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK2-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK2-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK2-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// CHECK2-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK2-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK2-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// CHECK2-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK2-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK2-32-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// CHECK2-32-NEXT: ret void +// CHECK2-32-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK2-32-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK2-32-NEXT: entry: +// CHECK2-32-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK2-32-NEXT: ret void +// CHECK3-32-LABEL: define {{[^@]+}}@_Z3fooiPd +// CHECK3-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK3-32-NEXT: entry: +// CHECK3-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-32-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// CHECK3-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK3-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK3-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// CHECK3-32-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK3-32-NEXT: [[P:%.*]] = alloca i32*, align 64 +// CHECK3-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[GA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK3-32-NEXT: store i16 0, i16* [[AA]], align 2 +// CHECK3-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP1:%.*]] = call i8* @llvm.stacksave() +// CHECK3-32-NEXT: store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4 +// CHECK3-32-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4 +// CHECK3-32-NEXT: store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4 +// CHECK3-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]] +// CHECK3-32-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8 +// CHECK3-32-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// CHECK3-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 [[TMP4]], i32* [[X]], align 4 +// CHECK3-32-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// CHECK3-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 [[TMP5]], i32* [[Y]], align 4 +// CHECK3-32-NEXT: store i32* [[A]], i32** [[P]], align 64 +// CHECK3-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-32-NEXT: store i32 [[TMP6]], i32* [[A_CASTED]], align 4 +// CHECK3-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK3-32-NEXT: [[TMP8:%.*]] = load i32*, i32** [[P]], align 64 +// CHECK3-32-NEXT: [[TMP9:%.*]] = load i32, i32* @ga, align 4 +// CHECK3-32-NEXT: store i32 [[TMP9]], i32* [[GA_CASTED]], align 4 +// CHECK3-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[GA_CASTED]], align 4 +// CHECK3-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP7]], i32* [[TMP12]], align 4 +// CHECK3-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP7]], i32* [[TMP14]], align 4 +// CHECK3-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP15]], align 4 +// CHECK3-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-32-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32** +// CHECK3-32-NEXT: store i32* [[TMP8]], i32** [[TMP17]], align 4 +// CHECK3-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-32-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32** +// CHECK3-32-NEXT: store i32* [[TMP8]], i32** [[TMP19]], align 4 +// CHECK3-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP20]], align 4 +// CHECK3-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-32-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP10]], i32* [[TMP22]], align 4 +// CHECK3-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-32-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP10]], i32* [[TMP24]], align 4 +// CHECK3-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP25]], align 4 +// CHECK3-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-32-NEXT: store i32 2, i32* [[TMP28]], align 4 +// CHECK3-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-32-NEXT: store i32 3, i32* [[TMP29]], align 4 +// CHECK3-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-32-NEXT: store i8** [[TMP26]], i8*** [[TMP30]], align 4 +// CHECK3-32-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-32-NEXT: store i8** [[TMP27]], i8*** [[TMP31]], align 4 +// CHECK3-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP32]], align 4 +// CHECK3-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP33]], align 4 +// CHECK3-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-32-NEXT: store i8** null, i8*** [[TMP34]], align 4 +// CHECK3-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-32-NEXT: store i8** null, i8*** [[TMP35]], align 4 +// CHECK3-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-32-NEXT: store i64 0, i64* [[TMP36]], align 8 +// CHECK3-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-32-NEXT: store i64 0, i64* [[TMP37]], align 8 +// CHECK3-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP38]], align 4 +// CHECK3-32-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP39]], align 4 +// CHECK3-32-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-32-NEXT: store i32 0, i32* [[TMP40]], align 4 +// CHECK3-32-NEXT: [[TMP41:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK3-32-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK3-32-NEXT: br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-32: omp_offload.failed: +// CHECK3-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i32 [[TMP7]], i32* [[TMP8]], i32 [[TMP10]]) #[[ATTR3:[0-9]+]] +// CHECK3-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3-32: omp_offload.cont: +// CHECK3-32-NEXT: [[TMP43:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK3-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_CASTED]] to i16* +// CHECK3-32-NEXT: store i16 [[TMP43]], i16* [[CONV]], align 2 +// CHECK3-32-NEXT: [[TMP44:%.*]] = load i32, i32* [[AA_CASTED]], align 4 +// CHECK3-32-NEXT: [[TMP45:%.*]] = mul nuw i32 [[TMP0]], 4 +// CHECK3-32-NEXT: [[TMP46:%.*]] = sext i32 [[TMP45]] to i64 +// CHECK3-32-NEXT: [[TMP47:%.*]] = mul nuw i32 5, [[TMP2]] +// CHECK3-32-NEXT: [[TMP48:%.*]] = mul nuw i32 [[TMP47]], 8 +// CHECK3-32-NEXT: [[TMP49:%.*]] = sext i32 [[TMP48]] to i64 +// CHECK3-32-NEXT: [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP50]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i32 72, i1 false) +// CHECK3-32-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP44]], i32* [[TMP52]], align 4 +// CHECK3-32-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP44]], i32* [[TMP54]], align 4 +// CHECK3-32-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP55]], align 4 +// CHECK3-32-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1 +// CHECK3-32-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]** +// CHECK3-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 4 +// CHECK3-32-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 1 +// CHECK3-32-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]** +// CHECK3-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 4 +// CHECK3-32-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP60]], align 4 +// CHECK3-32-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2 +// CHECK3-32-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP0]], i32* [[TMP62]], align 4 +// CHECK3-32-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 2 +// CHECK3-32-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP0]], i32* [[TMP64]], align 4 +// CHECK3-32-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 2 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP65]], align 4 +// CHECK3-32-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3 +// CHECK3-32-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float** +// CHECK3-32-NEXT: store float* [[VLA]], float** [[TMP67]], align 4 +// CHECK3-32-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 3 +// CHECK3-32-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float** +// CHECK3-32-NEXT: store float* [[VLA]], float** [[TMP69]], align 4 +// CHECK3-32-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3 +// CHECK3-32-NEXT: store i64 [[TMP46]], i64* [[TMP70]], align 4 +// CHECK3-32-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 3 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP71]], align 4 +// CHECK3-32-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4 +// CHECK3-32-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]** +// CHECK3-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 4 +// CHECK3-32-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 4 +// CHECK3-32-NEXT: [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]** +// CHECK3-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 4 +// CHECK3-32-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 4 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP76]], align 4 +// CHECK3-32-NEXT: [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5 +// CHECK3-32-NEXT: [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32* +// CHECK3-32-NEXT: store i32 5, i32* [[TMP78]], align 4 +// CHECK3-32-NEXT: [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 5 +// CHECK3-32-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32* +// CHECK3-32-NEXT: store i32 5, i32* [[TMP80]], align 4 +// CHECK3-32-NEXT: [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 5 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP81]], align 4 +// CHECK3-32-NEXT: [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6 +// CHECK3-32-NEXT: [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP2]], i32* [[TMP83]], align 4 +// CHECK3-32-NEXT: [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 6 +// CHECK3-32-NEXT: [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP2]], i32* [[TMP85]], align 4 +// CHECK3-32-NEXT: [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 6 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP86]], align 4 +// CHECK3-32-NEXT: [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7 +// CHECK3-32-NEXT: [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double** +// CHECK3-32-NEXT: store double* [[VLA1]], double** [[TMP88]], align 4 +// CHECK3-32-NEXT: [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 7 +// CHECK3-32-NEXT: [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double** +// CHECK3-32-NEXT: store double* [[VLA1]], double** [[TMP90]], align 4 +// CHECK3-32-NEXT: [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7 +// CHECK3-32-NEXT: store i64 [[TMP49]], i64* [[TMP91]], align 4 +// CHECK3-32-NEXT: [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 7 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP92]], align 4 +// CHECK3-32-NEXT: [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8 +// CHECK3-32-NEXT: [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT** +// CHECK3-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 4 +// CHECK3-32-NEXT: [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 8 +// CHECK3-32-NEXT: [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT** +// CHECK3-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 4 +// CHECK3-32-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 8 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP97]], align 4 +// CHECK3-32-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK3-32-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK3-32-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 0 +// CHECK3-32-NEXT: store i32 2, i32* [[TMP101]], align 4 +// CHECK3-32-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 1 +// CHECK3-32-NEXT: store i32 9, i32* [[TMP102]], align 4 +// CHECK3-32-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 2 +// CHECK3-32-NEXT: store i8** [[TMP98]], i8*** [[TMP103]], align 4 +// CHECK3-32-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 3 +// CHECK3-32-NEXT: store i8** [[TMP99]], i8*** [[TMP104]], align 4 +// CHECK3-32-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 4 +// CHECK3-32-NEXT: store i64* [[TMP100]], i64** [[TMP105]], align 4 +// CHECK3-32-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 5 +// CHECK3-32-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 4 +// CHECK3-32-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 6 +// CHECK3-32-NEXT: store i8** null, i8*** [[TMP107]], align 4 +// CHECK3-32-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 7 +// CHECK3-32-NEXT: store i8** null, i8*** [[TMP108]], align 4 +// CHECK3-32-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 8 +// CHECK3-32-NEXT: store i64 0, i64* [[TMP109]], align 8 +// CHECK3-32-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 9 +// CHECK3-32-NEXT: store i64 0, i64* [[TMP110]], align 8 +// CHECK3-32-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 10 +// CHECK3-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4 +// CHECK3-32-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 11 +// CHECK3-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4 +// CHECK3-32-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 12 +// CHECK3-32-NEXT: store i32 0, i32* [[TMP113]], align 4 +// CHECK3-32-NEXT: [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]]) +// CHECK3-32-NEXT: [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0 +// CHECK3-32-NEXT: br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CHECK3-32: omp_offload.failed6: +// CHECK3-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i32 [[TMP44]], [10 x float]* [[B]], i32 [[TMP0]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP2]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]] +// CHECK3-32-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CHECK3-32: omp_offload.cont7: +// CHECK3-32-NEXT: [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double** +// CHECK3-32-NEXT: store double* [[TMP116]], double** [[TMP118]], align 4 +// CHECK3-32-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double** +// CHECK3-32-NEXT: store double* [[TMP116]], double** [[TMP120]], align 4 +// CHECK3-32-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP121]], align 4 +// CHECK3-32-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1 +// CHECK3-32-NEXT: [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0** +// CHECK3-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 4 +// CHECK3-32-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 1 +// CHECK3-32-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0** +// CHECK3-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 4 +// CHECK3-32-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP126]], align 4 +// CHECK3-32-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 +// CHECK3-32-NEXT: [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK3-32-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 0 +// CHECK3-32-NEXT: store i32 2, i32* [[TMP129]], align 4 +// CHECK3-32-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 1 +// CHECK3-32-NEXT: store i32 2, i32* [[TMP130]], align 4 +// CHECK3-32-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 2 +// CHECK3-32-NEXT: store i8** [[TMP127]], i8*** [[TMP131]], align 4 +// CHECK3-32-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 3 +// CHECK3-32-NEXT: store i8** [[TMP128]], i8*** [[TMP132]], align 4 +// CHECK3-32-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 4 +// CHECK3-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 4 +// CHECK3-32-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 5 +// CHECK3-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 4 +// CHECK3-32-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 6 +// CHECK3-32-NEXT: store i8** null, i8*** [[TMP135]], align 4 +// CHECK3-32-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 7 +// CHECK3-32-NEXT: store i8** null, i8*** [[TMP136]], align 4 +// CHECK3-32-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 8 +// CHECK3-32-NEXT: store i64 0, i64* [[TMP137]], align 8 +// CHECK3-32-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 9 +// CHECK3-32-NEXT: store i64 0, i64* [[TMP138]], align 8 +// CHECK3-32-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 10 +// CHECK3-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4 +// CHECK3-32-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 11 +// CHECK3-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4 +// CHECK3-32-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 12 +// CHECK3-32-NEXT: store i32 0, i32* [[TMP141]], align 4 +// CHECK3-32-NEXT: [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]]) +// CHECK3-32-NEXT: [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0 +// CHECK3-32-NEXT: br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]] +// CHECK3-32: omp_offload.failed12: +// CHECK3-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]] +// CHECK3-32-NEXT: br label [[OMP_OFFLOAD_CONT13]] +// CHECK3-32: omp_offload.cont13: +// CHECK3-32-NEXT: [[TMP144:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-32-NEXT: [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK3-32-NEXT: call void @llvm.stackrestore(i8* [[TMP145]]) +// CHECK3-32-NEXT: ret i32 [[TMP144]] +// CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// CHECK3-32-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-32-NEXT: entry: +// CHECK3-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-32-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK3-32-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 [[GA]], i32* [[GA_ADDR]], align 4 +// CHECK3-32-NEXT: ret void +// CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// CHECK3-32-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] { +// CHECK3-32-NEXT: entry: +// CHECK3-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 +// CHECK3-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-32-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 +// CHECK3-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-32-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 +// CHECK3-32-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// CHECK3-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK3-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK3-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// CHECK3-32-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 +// CHECK3-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// CHECK3-32-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 +// CHECK3-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// CHECK3-32-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 +// CHECK3-32-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 +// CHECK3-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 +// CHECK3-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK3-32-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// CHECK3-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 +// CHECK3-32-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// CHECK3-32-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false) +// CHECK3-32-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// CHECK3-32-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4 +// CHECK3-32-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4 +// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK3-32-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4 +// CHECK3-32-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// CHECK3-32-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false) +// CHECK3-32-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// CHECK3-32-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false) +// CHECK3-32-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// CHECK3-32-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8 +// CHECK3-32-NEXT: store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4 +// CHECK3-32-NEXT: store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4 +// CHECK3-32-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// CHECK3-32-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8 +// CHECK3-32-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// CHECK3-32-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false) +// CHECK3-32-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// CHECK3-32-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false) +// CHECK3-32-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-32-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK3-32-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// CHECK3-32-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK3-32-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// CHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2 +// CHECK3-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK3-32-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3 +// CHECK3-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// CHECK3-32-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1 +// CHECK3-32-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 +// CHECK3-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// CHECK3-32-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]] +// CHECK3-32-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]] +// CHECK3-32-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3 +// CHECK3-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// CHECK3-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// CHECK3-32-NEXT: store i64 1, i64* [[X]], align 4 +// CHECK3-32-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// CHECK3-32-NEXT: store i8 1, i8* [[Y]], align 4 +// CHECK3-32-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK3-32-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// CHECK3-32-NEXT: ret void +// CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// CHECK3-32-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] { +// CHECK3-32-NEXT: entry: +// CHECK3-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-32-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4 +// CHECK3-32-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK3-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK3-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8* +// CHECK3-32-NEXT: [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8* +// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[X]], align 4 +// CHECK3-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP3]] to double +// CHECK3-32-NEXT: [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i32 0 +// CHECK3-32-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 4 +// CHECK3-32-NEXT: [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK3-32-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i32 0 +// CHECK3-32-NEXT: [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 4 +// CHECK3-32-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// CHECK3-32-NEXT: store double [[INC]], double* [[ARRAYIDX2]], align 4 +// CHECK3-32-NEXT: ret void +// CHECK3-32-LABEL: define {{[^@]+}}@_Z3bariPd +// CHECK3-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// CHECK3-32-NEXT: entry: +// CHECK3-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 +// CHECK3-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK3-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK3-32-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]]) +// CHECK3-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// CHECK3-32-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK3-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]]) +// CHECK3-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// CHECK3-32-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK3-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]]) +// CHECK3-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// CHECK3-32-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// CHECK3-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]]) +// CHECK3-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// CHECK3-32-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// CHECK3-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-32-NEXT: ret i32 [[TMP9]] +// CHECK3-32-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// CHECK3-32-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK3-32-NEXT: entry: +// CHECK3-32-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK3-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK3-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4 +// CHECK3-32-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK3-32-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// CHECK3-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// CHECK3-32-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4 +// CHECK3-32-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]] +// CHECK3-32-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2 +// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK3-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-32-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 +// CHECK3-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 +// CHECK3-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP6:%.*]] = mul nuw i32 2, [[TMP1]] +// CHECK3-32-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// CHECK3-32-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 +// CHECK3-32-NEXT: [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i32 40, i1 false) +// CHECK3-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1** +// CHECK3-32-NEXT: store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4 +// CHECK3-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double** +// CHECK3-32-NEXT: store double* [[A]], double** [[TMP13]], align 4 +// CHECK3-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP14]], align 4 +// CHECK3-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-32-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP5]], i32* [[TMP16]], align 4 +// CHECK3-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-32-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP5]], i32* [[TMP18]], align 4 +// CHECK3-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP19]], align 4 +// CHECK3-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-32-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32* +// CHECK3-32-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK3-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-32-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32* +// CHECK3-32-NEXT: store i32 2, i32* [[TMP23]], align 4 +// CHECK3-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP24]], align 4 +// CHECK3-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK3-32-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[TMP26]], align 4 +// CHECK3-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK3-32-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[TMP28]], align 4 +// CHECK3-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP29]], align 4 +// CHECK3-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK3-32-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16** +// CHECK3-32-NEXT: store i16* [[VLA]], i16** [[TMP31]], align 4 +// CHECK3-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK3-32-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16** +// CHECK3-32-NEXT: store i16* [[VLA]], i16** [[TMP33]], align 4 +// CHECK3-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK3-32-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 4 +// CHECK3-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP35]], align 4 +// CHECK3-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK3-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-32-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-32-NEXT: store i32 2, i32* [[TMP39]], align 4 +// CHECK3-32-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-32-NEXT: store i32 5, i32* [[TMP40]], align 4 +// CHECK3-32-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-32-NEXT: store i8** [[TMP36]], i8*** [[TMP41]], align 4 +// CHECK3-32-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-32-NEXT: store i8** [[TMP37]], i8*** [[TMP42]], align 4 +// CHECK3-32-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-32-NEXT: store i64* [[TMP38]], i64** [[TMP43]], align 4 +// CHECK3-32-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-32-NEXT: store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 4 +// CHECK3-32-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-32-NEXT: store i8** null, i8*** [[TMP45]], align 4 +// CHECK3-32-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-32-NEXT: store i8** null, i8*** [[TMP46]], align 4 +// CHECK3-32-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-32-NEXT: store i64 0, i64* [[TMP47]], align 8 +// CHECK3-32-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-32-NEXT: store i64 0, i64* [[TMP48]], align 8 +// CHECK3-32-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4 +// CHECK3-32-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4 +// CHECK3-32-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-32-NEXT: store i32 0, i32* [[TMP51]], align 4 +// CHECK3-32-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK3-32-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +// CHECK3-32-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-32: omp_offload.failed: +// CHECK3-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]] +// CHECK3-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3-32: omp_offload.cont: +// CHECK3-32-NEXT: [[TMP54:%.*]] = mul nsw i32 1, [[TMP1]] +// CHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP54]] +// CHECK3-32-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK3-32-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2 +// CHECK3-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP55]] to i32 +// CHECK3-32-NEXT: [[TMP56:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP56]] +// CHECK3-32-NEXT: [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK3-32-NEXT: call void @llvm.stackrestore(i8* [[TMP57]]) +// CHECK3-32-NEXT: ret i32 [[ADD3]] +// CHECK3-32-LABEL: define {{[^@]+}}@_ZL7fstatici +// CHECK3-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] { +// CHECK3-32-NEXT: entry: +// CHECK3-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// CHECK3-32-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK3-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK3-32-NEXT: store i8 0, i8* [[AAA]], align 1 +// CHECK3-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-32-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 +// CHECK3-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK3-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK3-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* +// CHECK3-32-NEXT: store i8 [[TMP2]], i8* [[CONV]], align 1 +// CHECK3-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 +// CHECK3-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4 +// CHECK3-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4 +// CHECK3-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP8]], align 4 +// CHECK3-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-32-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP3]], i32* [[TMP10]], align 4 +// CHECK3-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-32-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP3]], i32* [[TMP12]], align 4 +// CHECK3-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP13]], align 4 +// CHECK3-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-32-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]** +// CHECK3-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 4 +// CHECK3-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-32-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]** +// CHECK3-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 4 +// CHECK3-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP18]], align 4 +// CHECK3-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-32-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK3-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-32-NEXT: store i32 3, i32* [[TMP22]], align 4 +// CHECK3-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-32-NEXT: store i8** [[TMP19]], i8*** [[TMP23]], align 4 +// CHECK3-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-32-NEXT: store i8** [[TMP20]], i8*** [[TMP24]], align 4 +// CHECK3-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 4 +// CHECK3-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 4 +// CHECK3-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-32-NEXT: store i8** null, i8*** [[TMP27]], align 4 +// CHECK3-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-32-NEXT: store i8** null, i8*** [[TMP28]], align 4 +// CHECK3-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-32-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK3-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-32-NEXT: store i64 0, i64* [[TMP30]], align 8 +// CHECK3-32-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4 +// CHECK3-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4 +// CHECK3-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-32-NEXT: store i32 0, i32* [[TMP33]], align 4 +// CHECK3-32-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK3-32-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-32-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-32: omp_offload.failed: +// CHECK3-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i32 [[TMP1]], i32 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK3-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3-32: omp_offload.cont: +// CHECK3-32-NEXT: [[TMP36:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-32-NEXT: ret i32 [[TMP36]] +// CHECK3-32-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// CHECK3-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat { +// CHECK3-32-NEXT: entry: +// CHECK3-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK3-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK3-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-32-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 +// CHECK3-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK3-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[TMP3]], align 4 +// CHECK3-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4 +// CHECK3-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP6]], align 4 +// CHECK3-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-32-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]** +// CHECK3-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 4 +// CHECK3-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-32-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]** +// CHECK3-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 4 +// CHECK3-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP11]], align 4 +// CHECK3-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-32-NEXT: store i32 2, i32* [[TMP14]], align 4 +// CHECK3-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-32-NEXT: store i32 2, i32* [[TMP15]], align 4 +// CHECK3-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-32-NEXT: store i8** [[TMP12]], i8*** [[TMP16]], align 4 +// CHECK3-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-32-NEXT: store i8** [[TMP13]], i8*** [[TMP17]], align 4 +// CHECK3-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 4 +// CHECK3-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 4 +// CHECK3-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-32-NEXT: store i8** null, i8*** [[TMP20]], align 4 +// CHECK3-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-32-NEXT: store i8** null, i8*** [[TMP21]], align 4 +// CHECK3-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-32-NEXT: store i64 0, i64* [[TMP22]], align 8 +// CHECK3-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-32-NEXT: store i64 0, i64* [[TMP23]], align 8 +// CHECK3-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4 +// CHECK3-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4 +// CHECK3-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-32-NEXT: store i32 0, i32* [[TMP26]], align 4 +// CHECK3-32-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK3-32-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK3-32-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-32: omp_offload.failed: +// CHECK3-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i32 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK3-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3-32: omp_offload.cont: +// CHECK3-32-NEXT: [[TMP29:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-32-NEXT: ret i32 [[TMP29]] +// CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// CHECK3-32-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-32-NEXT: entry: +// CHECK3-32-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK3-32-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// CHECK3-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK3-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// CHECK3-32-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// CHECK3-32-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// CHECK3-32-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4 +// CHECK3-32-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// CHECK3-32-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2 +// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK3-32-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// CHECK3-32-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// CHECK3-32-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// CHECK3-32-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// CHECK3-32-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false) +// CHECK3-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK3-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double +// CHECK3-32-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK3-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK3-32-NEXT: store double [[ADD]], double* [[A]], align 4 +// CHECK3-32-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP11:%.*]] = load double, double* [[A4]], align 4 +// CHECK3-32-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK3-32-NEXT: store double [[INC]], double* [[A4]], align 4 +// CHECK3-32-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// CHECK3-32-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]] +// CHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]] +// CHECK3-32-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK3-32-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// CHECK3-32-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK3-32-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// CHECK3-32-NEXT: ret void +// CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// CHECK3-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK3-32-NEXT: entry: +// CHECK3-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK3-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 +// CHECK3-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* +// CHECK3-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK3-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// CHECK3-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK3-32-NEXT: [[CONV2:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK3-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 +// CHECK3-32-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 +// CHECK3-32-NEXT: store i8 [[CONV4]], i8* [[CONV]], align 1 +// CHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// CHECK3-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK3-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK3-32-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 +// CHECK3-32-NEXT: ret void +// CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// CHECK3-32-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK3-32-NEXT: entry: +// CHECK3-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK3-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK3-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK3-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// CHECK3-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// CHECK3-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK3-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK3-32-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// CHECK3-32-NEXT: ret void +// CHECK3-32-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK3-32-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK3-32-NEXT: entry: +// CHECK3-32-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK3-32-NEXT: ret void +// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// TCHECK-64-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] { +// TCHECK-64-NEXT: entry: +// TCHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 8 +// TCHECK-64-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// TCHECK-64-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 8 +// TCHECK-64-NEXT: store i64 [[GA]], i64* [[GA_ADDR]], align 8 +// TCHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// TCHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32* +// TCHECK-64-NEXT: ret void +// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// TCHECK-64-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] { +// TCHECK-64-NEXT: entry: +// TCHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 +// TCHECK-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 +// TCHECK-64-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 +// TCHECK-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 +// TCHECK-64-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 +// TCHECK-64-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// TCHECK-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// TCHECK-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// TCHECK-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// TCHECK-64-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 +// TCHECK-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 +// TCHECK-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// TCHECK-64-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 +// TCHECK-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 +// TCHECK-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// TCHECK-64-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 +// TCHECK-64-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 +// TCHECK-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 +// TCHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// TCHECK-64-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// TCHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 +// TCHECK-64-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// TCHECK-64-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// TCHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false) +// TCHECK-64-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// TCHECK-64-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8 +// TCHECK-64-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4 +// TCHECK-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// TCHECK-64-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4 +// TCHECK-64-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// TCHECK-64-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// TCHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false) +// TCHECK-64-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// TCHECK-64-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// TCHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false) +// TCHECK-64-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// TCHECK-64-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8 +// TCHECK-64-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// TCHECK-64-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8 +// TCHECK-64-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// TCHECK-64-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 +// TCHECK-64-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// TCHECK-64-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// TCHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false) +// TCHECK-64-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// TCHECK-64-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// TCHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false) +// TCHECK-64-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// TCHECK-64-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// TCHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// TCHECK-64-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// TCHECK-64-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// TCHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2 +// TCHECK-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// TCHECK-64-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3 +// TCHECK-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// TCHECK-64-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1 +// TCHECK-64-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2 +// TCHECK-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// TCHECK-64-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]] +// TCHECK-64-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]] +// TCHECK-64-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3 +// TCHECK-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// TCHECK-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// TCHECK-64-NEXT: store i64 1, i64* [[X]], align 8 +// TCHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// TCHECK-64-NEXT: store i8 1, i8* [[Y]], align 8 +// TCHECK-64-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// TCHECK-64-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// TCHECK-64-NEXT: ret void +// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// TCHECK-64-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] { +// TCHECK-64-NEXT: entry: +// TCHECK-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// TCHECK-64-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8 +// TCHECK-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// TCHECK-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8 +// TCHECK-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0 +// TCHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[X]], align 4 +// TCHECK-64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// TCHECK-64-NEXT: [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// TCHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 0 +// TCHECK-64-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 8 +// TCHECK-64-NEXT: [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// TCHECK-64-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 0 +// TCHECK-64-NEXT: [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 8 +// TCHECK-64-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00 +// TCHECK-64-NEXT: store double [[INC]], double* [[ARRAYIDX1]], align 8 +// TCHECK-64-NEXT: ret void +// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// TCHECK-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK-64-NEXT: entry: +// TCHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// TCHECK-64-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4 +// TCHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// TCHECK-64-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 +// TCHECK-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// TCHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// TCHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* +// TCHECK-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8* +// TCHECK-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// TCHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// TCHECK-64-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 +// TCHECK-64-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32 +// TCHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// TCHECK-64-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// TCHECK-64-NEXT: store i8 [[CONV5]], i8* [[CONV1]], align 1 +// TCHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2 +// TCHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 +// TCHECK-64-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// TCHECK-64-NEXT: ret void +// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// TCHECK-64-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// TCHECK-64-NEXT: entry: +// TCHECK-64-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// TCHECK-64-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// TCHECK-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// TCHECK-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// TCHECK-64-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 +// TCHECK-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// TCHECK-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// TCHECK-64-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// TCHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* +// TCHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// TCHECK-64-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// TCHECK-64-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8 +// TCHECK-64-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// TCHECK-64-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2 +// TCHECK-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// TCHECK-64-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8 +// TCHECK-64-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// TCHECK-64-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2 +// TCHECK-64-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// TCHECK-64-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// TCHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false) +// TCHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK-64-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double +// TCHECK-64-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 +// TCHECK-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK-64-NEXT: store double [[ADD]], double* [[A]], align 8 +// TCHECK-64-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK-64-NEXT: [[TMP11:%.*]] = load double, double* [[A5]], align 8 +// TCHECK-64-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// TCHECK-64-NEXT: store double [[INC]], double* [[A5]], align 8 +// TCHECK-64-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 +// TCHECK-64-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]] +// TCHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]] +// TCHECK-64-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// TCHECK-64-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// TCHECK-64-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// TCHECK-64-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// TCHECK-64-NEXT: ret void +// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// TCHECK-64-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK-64-NEXT: entry: +// TCHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// TCHECK-64-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// TCHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// TCHECK-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// TCHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// TCHECK-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// TCHECK-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// TCHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// TCHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2 +// TCHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// TCHECK-64-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// TCHECK-64-NEXT: ret void +// TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// TCHECK1-64-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] { +// TCHECK1-64-NEXT: entry: +// TCHECK1-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 8 +// TCHECK1-64-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// TCHECK1-64-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 8 +// TCHECK1-64-NEXT: store i64 [[GA]], i64* [[GA_ADDR]], align 8 +// TCHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// TCHECK1-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32* +// TCHECK1-64-NEXT: ret void +// TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// TCHECK1-64-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] { +// TCHECK1-64-NEXT: entry: +// TCHECK1-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 +// TCHECK1-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 +// TCHECK1-64-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 +// TCHECK1-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 +// TCHECK1-64-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 +// TCHECK1-64-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// TCHECK1-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// TCHECK1-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// TCHECK1-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// TCHECK1-64-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 +// TCHECK1-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 +// TCHECK1-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// TCHECK1-64-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 +// TCHECK1-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 +// TCHECK1-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// TCHECK1-64-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 +// TCHECK1-64-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 +// TCHECK1-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 +// TCHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// TCHECK1-64-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 +// TCHECK1-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// TCHECK1-64-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 +// TCHECK1-64-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 +// TCHECK1-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// TCHECK1-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 +// TCHECK1-64-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 +// TCHECK1-64-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 +// TCHECK1-64-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// TCHECK1-64-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// TCHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false) +// TCHECK1-64-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// TCHECK1-64-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8 +// TCHECK1-64-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4 +// TCHECK1-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// TCHECK1-64-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4 +// TCHECK1-64-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// TCHECK1-64-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// TCHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false) +// TCHECK1-64-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// TCHECK1-64-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// TCHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false) +// TCHECK1-64-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// TCHECK1-64-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8 +// TCHECK1-64-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// TCHECK1-64-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8 +// TCHECK1-64-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// TCHECK1-64-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 +// TCHECK1-64-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// TCHECK1-64-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// TCHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false) +// TCHECK1-64-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// TCHECK1-64-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// TCHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false) +// TCHECK1-64-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// TCHECK1-64-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// TCHECK1-64-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// TCHECK1-64-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// TCHECK1-64-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// TCHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2 +// TCHECK1-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// TCHECK1-64-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3 +// TCHECK1-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// TCHECK1-64-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1 +// TCHECK1-64-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2 +// TCHECK1-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// TCHECK1-64-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]] +// TCHECK1-64-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]] +// TCHECK1-64-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3 +// TCHECK1-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// TCHECK1-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// TCHECK1-64-NEXT: store i64 1, i64* [[X]], align 8 +// TCHECK1-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// TCHECK1-64-NEXT: store i8 1, i8* [[Y]], align 8 +// TCHECK1-64-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// TCHECK1-64-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// TCHECK1-64-NEXT: ret void +// TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// TCHECK1-64-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] { +// TCHECK1-64-NEXT: entry: +// TCHECK1-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// TCHECK1-64-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8 +// TCHECK1-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// TCHECK1-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8 +// TCHECK1-64-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8 +// TCHECK1-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0 +// TCHECK1-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[X]], align 4 +// TCHECK1-64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// TCHECK1-64-NEXT: [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// TCHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 0 +// TCHECK1-64-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 8 +// TCHECK1-64-NEXT: [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// TCHECK1-64-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 0 +// TCHECK1-64-NEXT: [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 8 +// TCHECK1-64-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00 +// TCHECK1-64-NEXT: store double [[INC]], double* [[ARRAYIDX1]], align 8 +// TCHECK1-64-NEXT: ret void +// TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// TCHECK1-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK1-64-NEXT: entry: +// TCHECK1-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// TCHECK1-64-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4 +// TCHECK1-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// TCHECK1-64-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 +// TCHECK1-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// TCHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// TCHECK1-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* +// TCHECK1-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// TCHECK1-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8* +// TCHECK1-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// TCHECK1-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK1-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK1-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// TCHECK1-64-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 +// TCHECK1-64-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32 +// TCHECK1-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// TCHECK1-64-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// TCHECK1-64-NEXT: store i8 [[CONV5]], i8* [[CONV1]], align 1 +// TCHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2 +// TCHECK1-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK1-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 +// TCHECK1-64-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// TCHECK1-64-NEXT: ret void +// TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// TCHECK1-64-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// TCHECK1-64-NEXT: entry: +// TCHECK1-64-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// TCHECK1-64-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// TCHECK1-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// TCHECK1-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// TCHECK1-64-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 +// TCHECK1-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// TCHECK1-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// TCHECK1-64-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 +// TCHECK1-64-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// TCHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* +// TCHECK1-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// TCHECK1-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// TCHECK1-64-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// TCHECK1-64-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// TCHECK1-64-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8 +// TCHECK1-64-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// TCHECK1-64-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2 +// TCHECK1-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// TCHECK1-64-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8 +// TCHECK1-64-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// TCHECK1-64-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2 +// TCHECK1-64-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// TCHECK1-64-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// TCHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false) +// TCHECK1-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK1-64-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double +// TCHECK1-64-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 +// TCHECK1-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK1-64-NEXT: store double [[ADD]], double* [[A]], align 8 +// TCHECK1-64-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK1-64-NEXT: [[TMP11:%.*]] = load double, double* [[A5]], align 8 +// TCHECK1-64-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// TCHECK1-64-NEXT: store double [[INC]], double* [[A5]], align 8 +// TCHECK1-64-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 +// TCHECK1-64-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]] +// TCHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]] +// TCHECK1-64-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// TCHECK1-64-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// TCHECK1-64-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// TCHECK1-64-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// TCHECK1-64-NEXT: ret void +// TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// TCHECK1-64-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK1-64-NEXT: entry: +// TCHECK1-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// TCHECK1-64-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// TCHECK1-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// TCHECK1-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// TCHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// TCHECK1-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// TCHECK1-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// TCHECK1-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// TCHECK1-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK1-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK1-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// TCHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2 +// TCHECK1-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK1-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// TCHECK1-64-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// TCHECK1-64-NEXT: ret void +// TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// TCHECK2-32-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] { +// TCHECK2-32-NEXT: entry: +// TCHECK2-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 4 +// TCHECK2-32-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// TCHECK2-32-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 4 +// TCHECK2-32-NEXT: store i32 [[GA]], i32* [[GA_ADDR]], align 4 +// TCHECK2-32-NEXT: ret void +// TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// TCHECK2-32-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR0]] { +// TCHECK2-32-NEXT: entry: +// TCHECK2-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 +// TCHECK2-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 +// TCHECK2-32-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 +// TCHECK2-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 +// TCHECK2-32-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 +// TCHECK2-32-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// TCHECK2-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// TCHECK2-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// TCHECK2-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// TCHECK2-32-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 +// TCHECK2-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 +// TCHECK2-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// TCHECK2-32-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 +// TCHECK2-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 +// TCHECK2-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// TCHECK2-32-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 +// TCHECK2-32-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 +// TCHECK2-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 +// TCHECK2-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// TCHECK2-32-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// TCHECK2-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 +// TCHECK2-32-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// TCHECK2-32-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// TCHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false) +// TCHECK2-32-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// TCHECK2-32-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4 +// TCHECK2-32-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4 +// TCHECK2-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// TCHECK2-32-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4 +// TCHECK2-32-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// TCHECK2-32-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// TCHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false) +// TCHECK2-32-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// TCHECK2-32-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// TCHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false) +// TCHECK2-32-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// TCHECK2-32-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8 +// TCHECK2-32-NEXT: store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4 +// TCHECK2-32-NEXT: store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4 +// TCHECK2-32-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// TCHECK2-32-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8 +// TCHECK2-32-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// TCHECK2-32-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// TCHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false) +// TCHECK2-32-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// TCHECK2-32-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// TCHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false) +// TCHECK2-32-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// TCHECK2-32-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// TCHECK2-32-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// TCHECK2-32-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// TCHECK2-32-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// TCHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2 +// TCHECK2-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// TCHECK2-32-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3 +// TCHECK2-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// TCHECK2-32-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1 +// TCHECK2-32-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 +// TCHECK2-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// TCHECK2-32-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]] +// TCHECK2-32-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]] +// TCHECK2-32-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3 +// TCHECK2-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// TCHECK2-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// TCHECK2-32-NEXT: store i64 1, i64* [[X]], align 4 +// TCHECK2-32-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// TCHECK2-32-NEXT: store i8 1, i8* [[Y]], align 4 +// TCHECK2-32-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// TCHECK2-32-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// TCHECK2-32-NEXT: ret void +// TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// TCHECK2-32-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] { +// TCHECK2-32-NEXT: entry: +// TCHECK2-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// TCHECK2-32-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4 +// TCHECK2-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// TCHECK2-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4 +// TCHECK2-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0 +// TCHECK2-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[X]], align 4 +// TCHECK2-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// TCHECK2-32-NEXT: [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// TCHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i32 0 +// TCHECK2-32-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 4 +// TCHECK2-32-NEXT: [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// TCHECK2-32-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i32 0 +// TCHECK2-32-NEXT: [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 4 +// TCHECK2-32-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00 +// TCHECK2-32-NEXT: store double [[INC]], double* [[ARRAYIDX1]], align 4 +// TCHECK2-32-NEXT: ret void +// TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// TCHECK2-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK2-32-NEXT: entry: +// TCHECK2-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// TCHECK2-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// TCHECK2-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// TCHECK2-32-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 +// TCHECK2-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// TCHECK2-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* +// TCHECK2-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// TCHECK2-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// TCHECK2-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// TCHECK2-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK2-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 +// TCHECK2-32-NEXT: [[CONV2:%.*]] = sext i8 [[TMP4]] to i32 +// TCHECK2-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 +// TCHECK2-32-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 +// TCHECK2-32-NEXT: store i8 [[CONV4]], i8* [[CONV]], align 1 +// TCHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// TCHECK2-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK2-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1 +// TCHECK2-32-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 +// TCHECK2-32-NEXT: ret void +// TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// TCHECK2-32-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// TCHECK2-32-NEXT: entry: +// TCHECK2-32-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// TCHECK2-32-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// TCHECK2-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// TCHECK2-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// TCHECK2-32-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// TCHECK2-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// TCHECK2-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// TCHECK2-32-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// TCHECK2-32-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// TCHECK2-32-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4 +// TCHECK2-32-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// TCHECK2-32-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2 +// TCHECK2-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// TCHECK2-32-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// TCHECK2-32-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// TCHECK2-32-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// TCHECK2-32-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// TCHECK2-32-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// TCHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false) +// TCHECK2-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// TCHECK2-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double +// TCHECK2-32-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// TCHECK2-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK2-32-NEXT: store double [[ADD]], double* [[A]], align 4 +// TCHECK2-32-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK2-32-NEXT: [[TMP11:%.*]] = load double, double* [[A4]], align 4 +// TCHECK2-32-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// TCHECK2-32-NEXT: store double [[INC]], double* [[A4]], align 4 +// TCHECK2-32-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// TCHECK2-32-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]] +// TCHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]] +// TCHECK2-32-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// TCHECK2-32-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// TCHECK2-32-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// TCHECK2-32-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// TCHECK2-32-NEXT: ret void +// TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// TCHECK2-32-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK2-32-NEXT: entry: +// TCHECK2-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// TCHECK2-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// TCHECK2-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// TCHECK2-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// TCHECK2-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// TCHECK2-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// TCHECK2-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK2-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// TCHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// TCHECK2-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK2-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// TCHECK2-32-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// TCHECK2-32-NEXT: ret void +// TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// TCHECK3-32-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] { +// TCHECK3-32-NEXT: entry: +// TCHECK3-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 4 +// TCHECK3-32-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// TCHECK3-32-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 4 +// TCHECK3-32-NEXT: store i32 [[GA]], i32* [[GA_ADDR]], align 4 +// TCHECK3-32-NEXT: ret void +// TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// TCHECK3-32-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR0]] { +// TCHECK3-32-NEXT: entry: +// TCHECK3-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 +// TCHECK3-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 +// TCHECK3-32-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 +// TCHECK3-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 +// TCHECK3-32-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 +// TCHECK3-32-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// TCHECK3-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// TCHECK3-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// TCHECK3-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// TCHECK3-32-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 +// TCHECK3-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 +// TCHECK3-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// TCHECK3-32-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 +// TCHECK3-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 +// TCHECK3-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// TCHECK3-32-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 +// TCHECK3-32-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 +// TCHECK3-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 +// TCHECK3-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// TCHECK3-32-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// TCHECK3-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 +// TCHECK3-32-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// TCHECK3-32-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// TCHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false) +// TCHECK3-32-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// TCHECK3-32-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4 +// TCHECK3-32-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4 +// TCHECK3-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// TCHECK3-32-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4 +// TCHECK3-32-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// TCHECK3-32-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// TCHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false) +// TCHECK3-32-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// TCHECK3-32-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// TCHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false) +// TCHECK3-32-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// TCHECK3-32-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8 +// TCHECK3-32-NEXT: store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4 +// TCHECK3-32-NEXT: store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4 +// TCHECK3-32-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// TCHECK3-32-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8 +// TCHECK3-32-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// TCHECK3-32-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// TCHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false) +// TCHECK3-32-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// TCHECK3-32-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// TCHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false) +// TCHECK3-32-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// TCHECK3-32-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// TCHECK3-32-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// TCHECK3-32-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// TCHECK3-32-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// TCHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2 +// TCHECK3-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// TCHECK3-32-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3 +// TCHECK3-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// TCHECK3-32-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1 +// TCHECK3-32-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 +// TCHECK3-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// TCHECK3-32-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]] +// TCHECK3-32-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]] +// TCHECK3-32-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3 +// TCHECK3-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// TCHECK3-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// TCHECK3-32-NEXT: store i64 1, i64* [[X]], align 4 +// TCHECK3-32-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// TCHECK3-32-NEXT: store i8 1, i8* [[Y]], align 4 +// TCHECK3-32-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// TCHECK3-32-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// TCHECK3-32-NEXT: ret void +// TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// TCHECK3-32-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] { +// TCHECK3-32-NEXT: entry: +// TCHECK3-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// TCHECK3-32-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4 +// TCHECK3-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// TCHECK3-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4 +// TCHECK3-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0 +// TCHECK3-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[X]], align 4 +// TCHECK3-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// TCHECK3-32-NEXT: [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// TCHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i32 0 +// TCHECK3-32-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 4 +// TCHECK3-32-NEXT: [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// TCHECK3-32-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i32 0 +// TCHECK3-32-NEXT: [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 4 +// TCHECK3-32-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00 +// TCHECK3-32-NEXT: store double [[INC]], double* [[ARRAYIDX1]], align 4 +// TCHECK3-32-NEXT: ret void +// TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// TCHECK3-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK3-32-NEXT: entry: +// TCHECK3-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// TCHECK3-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// TCHECK3-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// TCHECK3-32-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 +// TCHECK3-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// TCHECK3-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* +// TCHECK3-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// TCHECK3-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// TCHECK3-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// TCHECK3-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK3-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 +// TCHECK3-32-NEXT: [[CONV2:%.*]] = sext i8 [[TMP4]] to i32 +// TCHECK3-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 +// TCHECK3-32-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 +// TCHECK3-32-NEXT: store i8 [[CONV4]], i8* [[CONV]], align 1 +// TCHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// TCHECK3-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK3-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1 +// TCHECK3-32-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 +// TCHECK3-32-NEXT: ret void +// TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// TCHECK3-32-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// TCHECK3-32-NEXT: entry: +// TCHECK3-32-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// TCHECK3-32-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// TCHECK3-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// TCHECK3-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// TCHECK3-32-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// TCHECK3-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// TCHECK3-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// TCHECK3-32-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// TCHECK3-32-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// TCHECK3-32-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4 +// TCHECK3-32-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// TCHECK3-32-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2 +// TCHECK3-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// TCHECK3-32-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// TCHECK3-32-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// TCHECK3-32-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// TCHECK3-32-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// TCHECK3-32-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// TCHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false) +// TCHECK3-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// TCHECK3-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double +// TCHECK3-32-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// TCHECK3-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK3-32-NEXT: store double [[ADD]], double* [[A]], align 4 +// TCHECK3-32-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK3-32-NEXT: [[TMP11:%.*]] = load double, double* [[A4]], align 4 +// TCHECK3-32-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// TCHECK3-32-NEXT: store double [[INC]], double* [[A4]], align 4 +// TCHECK3-32-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// TCHECK3-32-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]] +// TCHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]] +// TCHECK3-32-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// TCHECK3-32-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// TCHECK3-32-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// TCHECK3-32-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// TCHECK3-32-NEXT: ret void +// TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// TCHECK3-32-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK3-32-NEXT: entry: +// TCHECK3-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// TCHECK3-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// TCHECK3-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// TCHECK3-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// TCHECK3-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// TCHECK3-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// TCHECK3-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK3-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// TCHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// TCHECK3-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK3-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// TCHECK3-32-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// TCHECK3-32-NEXT: ret void +// CHECK0-LABEL: define {{[^@]+}}@_Z3fooiPd +// CHECK0-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK0-NEXT: entry: +// CHECK0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK0-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK0-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK0-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK0-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// CHECK0-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK0-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK0-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// CHECK0-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK0-NEXT: [[P:%.*]] = alloca i32*, align 64 +// CHECK0-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[GA_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8 +// CHECK0-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK0-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [9 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_PTRS5:%.*]] = alloca [9 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [9 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8 +// CHECK0-NEXT: [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x i8*], align 8 +// CHECK0-NEXT: [[KERNEL_ARGS13:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK0-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK0-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK0-NEXT: store i16 0, i16* [[AA]], align 2 +// CHECK0-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK0-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// CHECK0-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8 +// CHECK0-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4 +// CHECK0-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK0-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// CHECK0-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]] +// CHECK0-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8 +// CHECK0-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// CHECK0-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: store i32 [[TMP6]], i32* [[X]], align 4 +// CHECK0-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// CHECK0-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: store i32 [[TMP7]], i32* [[Y]], align 4 +// CHECK0-NEXT: store i32* [[A]], i32** [[P]], align 64 +// CHECK0-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK0-NEXT: store i32 [[TMP8]], i32* [[CONV]], align 4 +// CHECK0-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK0-NEXT: [[TMP10:%.*]] = load i32*, i32** [[P]], align 64 +// CHECK0-NEXT: [[TMP11:%.*]] = load i32, i32* @ga, align 4 +// CHECK0-NEXT: [[CONV2:%.*]] = bitcast i64* [[GA_CASTED]] to i32* +// CHECK0-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 +// CHECK0-NEXT: [[TMP12:%.*]] = load i64, i64* [[GA_CASTED]], align 8 +// CHECK0-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64* +// CHECK0-NEXT: store i64 [[TMP9]], i64* [[TMP14]], align 8 +// CHECK0-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64* +// CHECK0-NEXT: store i64 [[TMP9]], i64* [[TMP16]], align 8 +// CHECK0-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK0-NEXT: store i8* null, i8** [[TMP17]], align 8 +// CHECK0-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK0-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32** +// CHECK0-NEXT: store i32* [[TMP10]], i32** [[TMP19]], align 8 +// CHECK0-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK0-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32** +// CHECK0-NEXT: store i32* [[TMP10]], i32** [[TMP21]], align 8 +// CHECK0-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK0-NEXT: store i8* null, i8** [[TMP22]], align 8 +// CHECK0-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK0-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64* +// CHECK0-NEXT: store i64 [[TMP12]], i64* [[TMP24]], align 8 +// CHECK0-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK0-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64* +// CHECK0-NEXT: store i64 [[TMP12]], i64* [[TMP26]], align 8 +// CHECK0-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK0-NEXT: store i8* null, i8** [[TMP27]], align 8 +// CHECK0-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK0-NEXT: store i32 2, i32* [[TMP30]], align 4 +// CHECK0-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK0-NEXT: store i32 3, i32* [[TMP31]], align 4 +// CHECK0-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK0-NEXT: store i8** [[TMP28]], i8*** [[TMP32]], align 8 +// CHECK0-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK0-NEXT: store i8** [[TMP29]], i8*** [[TMP33]], align 8 +// CHECK0-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK0-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP34]], align 8 +// CHECK0-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK0-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP35]], align 8 +// CHECK0-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK0-NEXT: store i8** null, i8*** [[TMP36]], align 8 +// CHECK0-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK0-NEXT: store i8** null, i8*** [[TMP37]], align 8 +// CHECK0-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK0-NEXT: store i64 0, i64* [[TMP38]], align 8 +// CHECK0-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK0-NEXT: store i64 0, i64* [[TMP39]], align 8 +// CHECK0-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK0-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP40]], align 4 +// CHECK0-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK0-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP41]], align 4 +// CHECK0-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK0-NEXT: store i32 0, i32* [[TMP42]], align 4 +// CHECK0-NEXT: [[TMP43:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK0-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0 +// CHECK0-NEXT: br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK0: omp_offload.failed: +// CHECK0-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i64 [[TMP9]], i32* [[TMP10]], i64 [[TMP12]]) #[[ATTR3:[0-9]+]] +// CHECK0-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK0: omp_offload.cont: +// CHECK0-NEXT: [[TMP45:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK0-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* +// CHECK0-NEXT: store i16 [[TMP45]], i16* [[CONV3]], align 2 +// CHECK0-NEXT: [[TMP46:%.*]] = load i64, i64* [[AA_CASTED]], align 8 +// CHECK0-NEXT: [[TMP47:%.*]] = mul nuw i64 [[TMP1]], 4 +// CHECK0-NEXT: [[TMP48:%.*]] = mul nuw i64 5, [[TMP4]] +// CHECK0-NEXT: [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 8 +// CHECK0-NEXT: [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP50]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i64 72, i1 false) +// CHECK0-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i64* +// CHECK0-NEXT: store i64 [[TMP46]], i64* [[TMP52]], align 8 +// CHECK0-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i64* +// CHECK0-NEXT: store i64 [[TMP46]], i64* [[TMP54]], align 8 +// CHECK0-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 0 +// CHECK0-NEXT: store i8* null, i8** [[TMP55]], align 8 +// CHECK0-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1 +// CHECK0-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]** +// CHECK0-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 8 +// CHECK0-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1 +// CHECK0-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]** +// CHECK0-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 8 +// CHECK0-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 1 +// CHECK0-NEXT: store i8* null, i8** [[TMP60]], align 8 +// CHECK0-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2 +// CHECK0-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i64* +// CHECK0-NEXT: store i64 [[TMP1]], i64* [[TMP62]], align 8 +// CHECK0-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2 +// CHECK0-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i64* +// CHECK0-NEXT: store i64 [[TMP1]], i64* [[TMP64]], align 8 +// CHECK0-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 2 +// CHECK0-NEXT: store i8* null, i8** [[TMP65]], align 8 +// CHECK0-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3 +// CHECK0-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float** +// CHECK0-NEXT: store float* [[VLA]], float** [[TMP67]], align 8 +// CHECK0-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3 +// CHECK0-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float** +// CHECK0-NEXT: store float* [[VLA]], float** [[TMP69]], align 8 +// CHECK0-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3 +// CHECK0-NEXT: store i64 [[TMP47]], i64* [[TMP70]], align 8 +// CHECK0-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 3 +// CHECK0-NEXT: store i8* null, i8** [[TMP71]], align 8 +// CHECK0-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 4 +// CHECK0-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]** +// CHECK0-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 8 +// CHECK0-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 4 +// CHECK0-NEXT: [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]** +// CHECK0-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 8 +// CHECK0-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 4 +// CHECK0-NEXT: store i8* null, i8** [[TMP76]], align 8 +// CHECK0-NEXT: [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 5 +// CHECK0-NEXT: [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64* +// CHECK0-NEXT: store i64 5, i64* [[TMP78]], align 8 +// CHECK0-NEXT: [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 5 +// CHECK0-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64* +// CHECK0-NEXT: store i64 5, i64* [[TMP80]], align 8 +// CHECK0-NEXT: [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 5 +// CHECK0-NEXT: store i8* null, i8** [[TMP81]], align 8 +// CHECK0-NEXT: [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 6 +// CHECK0-NEXT: [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64* +// CHECK0-NEXT: store i64 [[TMP4]], i64* [[TMP83]], align 8 +// CHECK0-NEXT: [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 6 +// CHECK0-NEXT: [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64* +// CHECK0-NEXT: store i64 [[TMP4]], i64* [[TMP85]], align 8 +// CHECK0-NEXT: [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 6 +// CHECK0-NEXT: store i8* null, i8** [[TMP86]], align 8 +// CHECK0-NEXT: [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 7 +// CHECK0-NEXT: [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double** +// CHECK0-NEXT: store double* [[VLA1]], double** [[TMP88]], align 8 +// CHECK0-NEXT: [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 7 +// CHECK0-NEXT: [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double** +// CHECK0-NEXT: store double* [[VLA1]], double** [[TMP90]], align 8 +// CHECK0-NEXT: [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7 +// CHECK0-NEXT: store i64 [[TMP49]], i64* [[TMP91]], align 8 +// CHECK0-NEXT: [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 7 +// CHECK0-NEXT: store i8* null, i8** [[TMP92]], align 8 +// CHECK0-NEXT: [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 8 +// CHECK0-NEXT: [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT** +// CHECK0-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 8 +// CHECK0-NEXT: [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 8 +// CHECK0-NEXT: [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT** +// CHECK0-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 8 +// CHECK0-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 8 +// CHECK0-NEXT: store i8* null, i8** [[TMP97]], align 8 +// CHECK0-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 0 +// CHECK0-NEXT: store i32 2, i32* [[TMP101]], align 4 +// CHECK0-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 1 +// CHECK0-NEXT: store i32 9, i32* [[TMP102]], align 4 +// CHECK0-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 2 +// CHECK0-NEXT: store i8** [[TMP98]], i8*** [[TMP103]], align 8 +// CHECK0-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 3 +// CHECK0-NEXT: store i8** [[TMP99]], i8*** [[TMP104]], align 8 +// CHECK0-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 4 +// CHECK0-NEXT: store i64* [[TMP100]], i64** [[TMP105]], align 8 +// CHECK0-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 5 +// CHECK0-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 8 +// CHECK0-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 6 +// CHECK0-NEXT: store i8** null, i8*** [[TMP107]], align 8 +// CHECK0-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 7 +// CHECK0-NEXT: store i8** null, i8*** [[TMP108]], align 8 +// CHECK0-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 8 +// CHECK0-NEXT: store i64 0, i64* [[TMP109]], align 8 +// CHECK0-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 9 +// CHECK0-NEXT: store i64 0, i64* [[TMP110]], align 8 +// CHECK0-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 10 +// CHECK0-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4 +// CHECK0-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 11 +// CHECK0-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4 +// CHECK0-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 12 +// CHECK0-NEXT: store i32 0, i32* [[TMP113]], align 4 +// CHECK0-NEXT: [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]]) +// CHECK0-NEXT: [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0 +// CHECK0-NEXT: br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]] +// CHECK0: omp_offload.failed8: +// CHECK0-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i64 [[TMP46]], [10 x float]* [[B]], i64 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP4]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]] +// CHECK0-NEXT: br label [[OMP_OFFLOAD_CONT9]] +// CHECK0: omp_offload.cont9: +// CHECK0-NEXT: [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK0-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double** +// CHECK0-NEXT: store double* [[TMP116]], double** [[TMP118]], align 8 +// CHECK0-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double** +// CHECK0-NEXT: store double* [[TMP116]], double** [[TMP120]], align 8 +// CHECK0-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0 +// CHECK0-NEXT: store i8* null, i8** [[TMP121]], align 8 +// CHECK0-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1 +// CHECK0-NEXT: [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0** +// CHECK0-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 8 +// CHECK0-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 1 +// CHECK0-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0** +// CHECK0-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 8 +// CHECK0-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1 +// CHECK0-NEXT: store i8* null, i8** [[TMP126]], align 8 +// CHECK0-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 0 +// CHECK0-NEXT: store i32 2, i32* [[TMP129]], align 4 +// CHECK0-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 1 +// CHECK0-NEXT: store i32 2, i32* [[TMP130]], align 4 +// CHECK0-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 2 +// CHECK0-NEXT: store i8** [[TMP127]], i8*** [[TMP131]], align 8 +// CHECK0-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 3 +// CHECK0-NEXT: store i8** [[TMP128]], i8*** [[TMP132]], align 8 +// CHECK0-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 4 +// CHECK0-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 8 +// CHECK0-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 5 +// CHECK0-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 8 +// CHECK0-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 6 +// CHECK0-NEXT: store i8** null, i8*** [[TMP135]], align 8 +// CHECK0-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 7 +// CHECK0-NEXT: store i8** null, i8*** [[TMP136]], align 8 +// CHECK0-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 8 +// CHECK0-NEXT: store i64 0, i64* [[TMP137]], align 8 +// CHECK0-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 9 +// CHECK0-NEXT: store i64 0, i64* [[TMP138]], align 8 +// CHECK0-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 10 +// CHECK0-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4 +// CHECK0-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 11 +// CHECK0-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4 +// CHECK0-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 12 +// CHECK0-NEXT: store i32 0, i32* [[TMP141]], align 4 +// CHECK0-NEXT: [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]]) +// CHECK0-NEXT: [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0 +// CHECK0-NEXT: br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED14:%.*]], label [[OMP_OFFLOAD_CONT15:%.*]] +// CHECK0: omp_offload.failed14: +// CHECK0-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]] +// CHECK0-NEXT: br label [[OMP_OFFLOAD_CONT15]] +// CHECK0: omp_offload.cont15: +// CHECK0-NEXT: [[TMP144:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-NEXT: [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK0-NEXT: call void @llvm.stackrestore(i8* [[TMP145]]) +// CHECK0-NEXT: ret i32 [[TMP144]] +// +// +// CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// CHECK0-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK0-NEXT: entry: +// CHECK0-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 8 +// CHECK0-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK0-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 8 +// CHECK0-NEXT: store i64 [[GA]], i64* [[GA_ADDR]], align 8 +// CHECK0-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK0-NEXT: [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32* +// CHECK0-NEXT: ret void +// +// +// CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// CHECK0-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] { +// CHECK0-NEXT: entry: +// CHECK0-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 +// CHECK0-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 +// CHECK0-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 +// CHECK0-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 +// CHECK0-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 +// CHECK0-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// CHECK0-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK0-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK0-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// CHECK0-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 +// CHECK0-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 +// CHECK0-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// CHECK0-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 +// CHECK0-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 +// CHECK0-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// CHECK0-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 +// CHECK0-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 +// CHECK0-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 +// CHECK0-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK0-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 +// CHECK0-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// CHECK0-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 +// CHECK0-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 +// CHECK0-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// CHECK0-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 +// CHECK0-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 +// CHECK0-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 +// CHECK0-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// CHECK0-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// CHECK0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false) +// CHECK0-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// CHECK0-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8 +// CHECK0-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4 +// CHECK0-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK0-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4 +// CHECK0-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// CHECK0-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// CHECK0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false) +// CHECK0-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// CHECK0-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// CHECK0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false) +// CHECK0-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// CHECK0-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8 +// CHECK0-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// CHECK0-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8 +// CHECK0-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// CHECK0-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 +// CHECK0-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// CHECK0-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// CHECK0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false) +// CHECK0-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// CHECK0-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// CHECK0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false) +// CHECK0-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK0-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK0-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// CHECK0-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK0-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// CHECK0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2 +// CHECK0-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK0-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3 +// CHECK0-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// CHECK0-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1 +// CHECK0-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2 +// CHECK0-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// CHECK0-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]] +// CHECK0-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]] +// CHECK0-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3 +// CHECK0-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// CHECK0-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// CHECK0-NEXT: store i64 1, i64* [[X]], align 8 +// CHECK0-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// CHECK0-NEXT: store i8 1, i8* [[Y]], align 8 +// CHECK0-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK0-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// CHECK0-NEXT: ret void +// +// +// CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// CHECK0-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] { +// CHECK0-NEXT: entry: +// CHECK0-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK0-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8 +// CHECK0-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK0-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK0-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8 +// CHECK0-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8 +// CHECK0-NEXT: [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8* +// CHECK0-NEXT: [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8* +// CHECK0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 8, i1 false) +// CHECK0-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP3:%.*]] = load i32, i32* [[X]], align 4 +// CHECK0-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP3]] to double +// CHECK0-NEXT: [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 0 +// CHECK0-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 8 +// CHECK0-NEXT: [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK0-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 0 +// CHECK0-NEXT: [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 8 +// CHECK0-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// CHECK0-NEXT: store double [[INC]], double* [[ARRAYIDX2]], align 8 +// CHECK0-NEXT: ret void +// +// +// CHECK0-LABEL: define {{[^@]+}}@_Z3bariPd +// CHECK0-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// CHECK0-NEXT: entry: +// CHECK0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK0-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK0-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK0-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 +// CHECK0-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK0-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK0-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK0-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]]) +// CHECK0-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// CHECK0-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK0-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]]) +// CHECK0-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// CHECK0-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK0-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]]) +// CHECK0-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// CHECK0-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// CHECK0-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]]) +// CHECK0-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// CHECK0-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// CHECK0-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-NEXT: ret i32 [[TMP9]] +// +// +// CHECK0-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// CHECK0-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK0-NEXT: entry: +// CHECK0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK0-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK0-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK0-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8 +// CHECK0-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK0-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// CHECK0-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// CHECK0-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK0-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// CHECK0-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK0-NEXT: [[TMP3:%.*]] = call i8* @llvm.stacksave() +// CHECK0-NEXT: store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8 +// CHECK0-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]] +// CHECK0-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2 +// CHECK0-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8 +// CHECK0-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK0-NEXT: [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to i32* +// CHECK0-NEXT: store i32 [[TMP5]], i32* [[CONV]], align 4 +// CHECK0-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 +// CHECK0-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP7:%.*]] = mul nuw i64 2, [[TMP2]] +// CHECK0-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 +// CHECK0-NEXT: [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP9]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i64 40, i1 false) +// CHECK0-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1** +// CHECK0-NEXT: store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8 +// CHECK0-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double** +// CHECK0-NEXT: store double* [[A]], double** [[TMP13]], align 8 +// CHECK0-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK0-NEXT: store i8* null, i8** [[TMP14]], align 8 +// CHECK0-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK0-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64* +// CHECK0-NEXT: store i64 [[TMP6]], i64* [[TMP16]], align 8 +// CHECK0-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK0-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i64* +// CHECK0-NEXT: store i64 [[TMP6]], i64* [[TMP18]], align 8 +// CHECK0-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK0-NEXT: store i8* null, i8** [[TMP19]], align 8 +// CHECK0-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK0-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64* +// CHECK0-NEXT: store i64 2, i64* [[TMP21]], align 8 +// CHECK0-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK0-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64* +// CHECK0-NEXT: store i64 2, i64* [[TMP23]], align 8 +// CHECK0-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK0-NEXT: store i8* null, i8** [[TMP24]], align 8 +// CHECK0-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK0-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64* +// CHECK0-NEXT: store i64 [[TMP2]], i64* [[TMP26]], align 8 +// CHECK0-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK0-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64* +// CHECK0-NEXT: store i64 [[TMP2]], i64* [[TMP28]], align 8 +// CHECK0-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK0-NEXT: store i8* null, i8** [[TMP29]], align 8 +// CHECK0-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK0-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16** +// CHECK0-NEXT: store i16* [[VLA]], i16** [[TMP31]], align 8 +// CHECK0-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK0-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16** +// CHECK0-NEXT: store i16* [[VLA]], i16** [[TMP33]], align 8 +// CHECK0-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK0-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 8 +// CHECK0-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 +// CHECK0-NEXT: store i8* null, i8** [[TMP35]], align 8 +// CHECK0-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK0-NEXT: store i32 2, i32* [[TMP39]], align 4 +// CHECK0-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK0-NEXT: store i32 5, i32* [[TMP40]], align 4 +// CHECK0-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK0-NEXT: store i8** [[TMP36]], i8*** [[TMP41]], align 8 +// CHECK0-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK0-NEXT: store i8** [[TMP37]], i8*** [[TMP42]], align 8 +// CHECK0-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK0-NEXT: store i64* [[TMP38]], i64** [[TMP43]], align 8 +// CHECK0-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK0-NEXT: store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 8 +// CHECK0-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK0-NEXT: store i8** null, i8*** [[TMP45]], align 8 +// CHECK0-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK0-NEXT: store i8** null, i8*** [[TMP46]], align 8 +// CHECK0-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK0-NEXT: store i64 0, i64* [[TMP47]], align 8 +// CHECK0-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK0-NEXT: store i64 0, i64* [[TMP48]], align 8 +// CHECK0-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK0-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4 +// CHECK0-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK0-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4 +// CHECK0-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK0-NEXT: store i32 0, i32* [[TMP51]], align 4 +// CHECK0-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK0-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +// CHECK0-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK0: omp_offload.failed: +// CHECK0-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]] +// CHECK0-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK0: omp_offload.cont: +// CHECK0-NEXT: [[TMP54:%.*]] = mul nsw i64 1, [[TMP2]] +// CHECK0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP54]] +// CHECK0-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK0-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2 +// CHECK0-NEXT: [[CONV3:%.*]] = sext i16 [[TMP55]] to i32 +// CHECK0-NEXT: [[TMP56:%.*]] = load i32, i32* [[B]], align 4 +// CHECK0-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP56]] +// CHECK0-NEXT: [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK0-NEXT: call void @llvm.stackrestore(i8* [[TMP57]]) +// CHECK0-NEXT: ret i32 [[ADD4]] +// +// +// CHECK0-LABEL: define {{[^@]+}}@_ZL7fstatici +// CHECK0-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] { +// CHECK0-NEXT: entry: +// CHECK0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK0-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK0-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// CHECK0-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK0-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8 +// CHECK0-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK0-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK0-NEXT: store i8 0, i8* [[AAA]], align 1 +// CHECK0-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK0-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 +// CHECK0-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK0-NEXT: [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK0-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* +// CHECK0-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 +// CHECK0-NEXT: [[TMP3:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 +// CHECK0-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64* +// CHECK0-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8 +// CHECK0-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64* +// CHECK0-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8 +// CHECK0-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK0-NEXT: store i8* null, i8** [[TMP8]], align 8 +// CHECK0-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK0-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64* +// CHECK0-NEXT: store i64 [[TMP3]], i64* [[TMP10]], align 8 +// CHECK0-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK0-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64* +// CHECK0-NEXT: store i64 [[TMP3]], i64* [[TMP12]], align 8 +// CHECK0-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK0-NEXT: store i8* null, i8** [[TMP13]], align 8 +// CHECK0-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK0-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]** +// CHECK0-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 8 +// CHECK0-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK0-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]** +// CHECK0-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 8 +// CHECK0-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK0-NEXT: store i8* null, i8** [[TMP18]], align 8 +// CHECK0-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK0-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK0-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK0-NEXT: store i32 3, i32* [[TMP22]], align 4 +// CHECK0-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK0-NEXT: store i8** [[TMP19]], i8*** [[TMP23]], align 8 +// CHECK0-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK0-NEXT: store i8** [[TMP20]], i8*** [[TMP24]], align 8 +// CHECK0-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK0-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 8 +// CHECK0-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK0-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 8 +// CHECK0-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK0-NEXT: store i8** null, i8*** [[TMP27]], align 8 +// CHECK0-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK0-NEXT: store i8** null, i8*** [[TMP28]], align 8 +// CHECK0-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK0-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK0-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK0-NEXT: store i64 0, i64* [[TMP30]], align 8 +// CHECK0-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK0-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4 +// CHECK0-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK0-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4 +// CHECK0-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK0-NEXT: store i32 0, i32* [[TMP33]], align 4 +// CHECK0-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK0-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK0-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK0: omp_offload.failed: +// CHECK0-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i64 [[TMP1]], i64 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK0-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK0: omp_offload.cont: +// CHECK0-NEXT: [[TMP36:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-NEXT: ret i32 [[TMP36]] +// +// +// CHECK0-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// CHECK0-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat { +// CHECK0-NEXT: entry: +// CHECK0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK0-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK0-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK0-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8 +// CHECK0-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK0-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK0-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK0-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 +// CHECK0-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK0-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i64* +// CHECK0-NEXT: store i64 [[TMP1]], i64* [[TMP3]], align 8 +// CHECK0-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64* +// CHECK0-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8 +// CHECK0-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK0-NEXT: store i8* null, i8** [[TMP6]], align 8 +// CHECK0-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK0-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]** +// CHECK0-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 8 +// CHECK0-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK0-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]** +// CHECK0-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 8 +// CHECK0-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK0-NEXT: store i8* null, i8** [[TMP11]], align 8 +// CHECK0-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK0-NEXT: store i32 2, i32* [[TMP14]], align 4 +// CHECK0-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK0-NEXT: store i32 2, i32* [[TMP15]], align 4 +// CHECK0-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK0-NEXT: store i8** [[TMP12]], i8*** [[TMP16]], align 8 +// CHECK0-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK0-NEXT: store i8** [[TMP13]], i8*** [[TMP17]], align 8 +// CHECK0-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK0-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 8 +// CHECK0-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK0-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 8 +// CHECK0-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK0-NEXT: store i8** null, i8*** [[TMP20]], align 8 +// CHECK0-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK0-NEXT: store i8** null, i8*** [[TMP21]], align 8 +// CHECK0-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK0-NEXT: store i64 0, i64* [[TMP22]], align 8 +// CHECK0-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK0-NEXT: store i64 0, i64* [[TMP23]], align 8 +// CHECK0-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK0-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4 +// CHECK0-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK0-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4 +// CHECK0-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK0-NEXT: store i32 0, i32* [[TMP26]], align 4 +// CHECK0-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK0-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK0-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK0: omp_offload.failed: +// CHECK0-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i64 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK0-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK0: omp_offload.cont: +// CHECK0-NEXT: [[TMP29:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-NEXT: ret i32 [[TMP29]] +// +// +// CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// CHECK0-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK0-NEXT: entry: +// CHECK0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK0-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// CHECK0-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK0-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// CHECK0-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 +// CHECK0-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// CHECK0-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// CHECK0-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 +// CHECK0-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// CHECK0-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* +// CHECK0-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// CHECK0-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// CHECK0-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// CHECK0-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// CHECK0-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8 +// CHECK0-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// CHECK0-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2 +// CHECK0-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK0-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8 +// CHECK0-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// CHECK0-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2 +// CHECK0-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// CHECK0-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// CHECK0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false) +// CHECK0-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK0-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double +// CHECK0-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 +// CHECK0-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK0-NEXT: store double [[ADD]], double* [[A]], align 8 +// CHECK0-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP11:%.*]] = load double, double* [[A5]], align 8 +// CHECK0-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK0-NEXT: store double [[INC]], double* [[A5]], align 8 +// CHECK0-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 +// CHECK0-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]] +// CHECK0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]] +// CHECK0-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK0-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK0-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK0-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// CHECK0-NEXT: ret void +// +// +// CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// CHECK0-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK0-NEXT: entry: +// CHECK0-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK0-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4 +// CHECK0-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK0-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 +// CHECK0-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK0-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK0-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* +// CHECK0-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK0-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8* +// CHECK0-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// CHECK0-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK0-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK0-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK0-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK0-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK0-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// CHECK0-NEXT: store i8 [[CONV5]], i8* [[CONV1]], align 1 +// CHECK0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2 +// CHECK0-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK0-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK0-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK0-NEXT: ret void +// +// +// CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// CHECK0-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK0-NEXT: entry: +// CHECK0-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK0-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK0-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK0-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK0-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK0-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK0-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK0-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// CHECK0-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK0-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2 +// CHECK0-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK0-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK0-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// CHECK0-NEXT: ret void +// +// +// CHECK0-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK0-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK0-NEXT: entry: +// CHECK0-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK0-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_Z3fooiPd +// CHECK1-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK1-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// CHECK1-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK1-NEXT: [[P:%.*]] = alloca i32*, align 64 +// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[GA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [9 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS5:%.*]] = alloca [9 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [9 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8 +// CHECK1-NEXT: [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-NEXT: [[KERNEL_ARGS13:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK1-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK1-NEXT: store i16 0, i16* [[AA]], align 2 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK1-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4 +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// CHECK1-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]] +// CHECK1-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[X]], align 4 +// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[Y]], align 4 +// CHECK1-NEXT: store i32* [[A]], i32** [[P]], align 64 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[P]], align 64 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* @ga, align 4 +// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[GA_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[GA_CASTED]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64* +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64* +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store i8* null, i8** [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32** +// CHECK1-NEXT: store i32* [[TMP10]], i32** [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32** +// CHECK1-NEXT: store i32* [[TMP10]], i32** [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store i8* null, i8** [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64* +// CHECK1-NEXT: store i64 [[TMP12]], i64* [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64* +// CHECK1-NEXT: store i64 [[TMP12]], i64* [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store i8* null, i8** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, i32* [[TMP30]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 3, i32* [[TMP31]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store i8** [[TMP28]], i8*** [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store i8** [[TMP29]], i8*** [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store i8** null, i8*** [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store i8** null, i8*** [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, i64* [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, i64* [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP40]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP41]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, i32* [[TMP42]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0 +// CHECK1-NEXT: br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1: omp_offload.failed: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i64 [[TMP9]], i32* [[TMP10]], i64 [[TMP12]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1: omp_offload.cont: +// CHECK1-NEXT: [[TMP45:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* +// CHECK1-NEXT: store i16 [[TMP45]], i16* [[CONV3]], align 2 +// CHECK1-NEXT: [[TMP46:%.*]] = load i64, i64* [[AA_CASTED]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = mul nuw i64 [[TMP1]], 4 +// CHECK1-NEXT: [[TMP48:%.*]] = mul nuw i64 5, [[TMP4]] +// CHECK1-NEXT: [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 8 +// CHECK1-NEXT: [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP50]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i64 72, i1 false) +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i64* +// CHECK1-NEXT: store i64 [[TMP46]], i64* [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i64* +// CHECK1-NEXT: store i64 [[TMP46]], i64* [[TMP54]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 0 +// CHECK1-NEXT: store i8* null, i8** [[TMP55]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]** +// CHECK1-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]** +// CHECK1-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 1 +// CHECK1-NEXT: store i8* null, i8** [[TMP60]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i64* +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP62]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i64* +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP64]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 2 +// CHECK1-NEXT: store i8* null, i8** [[TMP65]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float** +// CHECK1-NEXT: store float* [[VLA]], float** [[TMP67]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float** +// CHECK1-NEXT: store float* [[VLA]], float** [[TMP69]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP47]], i64* [[TMP70]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 3 +// CHECK1-NEXT: store i8* null, i8** [[TMP71]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]** +// CHECK1-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 8 +// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]** +// CHECK1-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 8 +// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP76]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64* +// CHECK1-NEXT: store i64 5, i64* [[TMP78]], align 8 +// CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64* +// CHECK1-NEXT: store i64 5, i64* [[TMP80]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 5 +// CHECK1-NEXT: store i8* null, i8** [[TMP81]], align 8 +// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64* +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[TMP83]], align 8 +// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64* +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[TMP85]], align 8 +// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 6 +// CHECK1-NEXT: store i8* null, i8** [[TMP86]], align 8 +// CHECK1-NEXT: [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double** +// CHECK1-NEXT: store double* [[VLA1]], double** [[TMP88]], align 8 +// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double** +// CHECK1-NEXT: store double* [[VLA1]], double** [[TMP90]], align 8 +// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7 +// CHECK1-NEXT: store i64 [[TMP49]], i64* [[TMP91]], align 8 +// CHECK1-NEXT: [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 7 +// CHECK1-NEXT: store i8* null, i8** [[TMP92]], align 8 +// CHECK1-NEXT: [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT** +// CHECK1-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 8 +// CHECK1-NEXT: [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT** +// CHECK1-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 8 +// CHECK1-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 8 +// CHECK1-NEXT: store i8* null, i8** [[TMP97]], align 8 +// CHECK1-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, i32* [[TMP101]], align 4 +// CHECK1-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 1 +// CHECK1-NEXT: store i32 9, i32* [[TMP102]], align 4 +// CHECK1-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 2 +// CHECK1-NEXT: store i8** [[TMP98]], i8*** [[TMP103]], align 8 +// CHECK1-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 3 +// CHECK1-NEXT: store i8** [[TMP99]], i8*** [[TMP104]], align 8 +// CHECK1-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 4 +// CHECK1-NEXT: store i64* [[TMP100]], i64** [[TMP105]], align 8 +// CHECK1-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 5 +// CHECK1-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 8 +// CHECK1-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 6 +// CHECK1-NEXT: store i8** null, i8*** [[TMP107]], align 8 +// CHECK1-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 7 +// CHECK1-NEXT: store i8** null, i8*** [[TMP108]], align 8 +// CHECK1-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, i64* [[TMP109]], align 8 +// CHECK1-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, i64* [[TMP110]], align 8 +// CHECK1-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4 +// CHECK1-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4 +// CHECK1-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, i32* [[TMP113]], align 4 +// CHECK1-NEXT: [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]]) +// CHECK1-NEXT: [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0 +// CHECK1-NEXT: br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]] +// CHECK1: omp_offload.failed8: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i64 [[TMP46]], [10 x float]* [[B]], i64 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP4]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT9]] +// CHECK1: omp_offload.cont9: +// CHECK1-NEXT: [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK1-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double** +// CHECK1-NEXT: store double* [[TMP116]], double** [[TMP118]], align 8 +// CHECK1-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double** +// CHECK1-NEXT: store double* [[TMP116]], double** [[TMP120]], align 8 +// CHECK1-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0 +// CHECK1-NEXT: store i8* null, i8** [[TMP121]], align 8 +// CHECK1-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0** +// CHECK1-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 8 +// CHECK1-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0** +// CHECK1-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 8 +// CHECK1-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1 +// CHECK1-NEXT: store i8* null, i8** [[TMP126]], align 8 +// CHECK1-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, i32* [[TMP129]], align 4 +// CHECK1-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 1 +// CHECK1-NEXT: store i32 2, i32* [[TMP130]], align 4 +// CHECK1-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 2 +// CHECK1-NEXT: store i8** [[TMP127]], i8*** [[TMP131]], align 8 +// CHECK1-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 3 +// CHECK1-NEXT: store i8** [[TMP128]], i8*** [[TMP132]], align 8 +// CHECK1-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 4 +// CHECK1-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 8 +// CHECK1-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 5 +// CHECK1-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 8 +// CHECK1-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 6 +// CHECK1-NEXT: store i8** null, i8*** [[TMP135]], align 8 +// CHECK1-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 7 +// CHECK1-NEXT: store i8** null, i8*** [[TMP136]], align 8 +// CHECK1-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, i64* [[TMP137]], align 8 +// CHECK1-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, i64* [[TMP138]], align 8 +// CHECK1-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4 +// CHECK1-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4 +// CHECK1-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, i32* [[TMP141]], align 4 +// CHECK1-NEXT: [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]]) +// CHECK1-NEXT: [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0 +// CHECK1-NEXT: br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED14:%.*]], label [[OMP_OFFLOAD_CONT15:%.*]] +// CHECK1: omp_offload.failed14: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT15]] +// CHECK1: omp_offload.cont15: +// CHECK1-NEXT: [[TMP144:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP145]]) +// CHECK1-NEXT: ret i32 [[TMP144]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// CHECK1-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK1-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[GA]], i64* [[GA_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32* +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// CHECK1-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 +// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 +// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 +// CHECK1-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK1-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 +// CHECK1-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// CHECK1-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 +// CHECK1-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// CHECK1-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 +// CHECK1-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 +// CHECK1-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4 +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4 +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false) +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false) +// CHECK1-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// CHECK1-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false) +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false) +// CHECK1-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK1-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// CHECK1-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK1-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2 +// CHECK1-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3 +// CHECK1-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2 +// CHECK1-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]] +// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]] +// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3 +// CHECK1-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// CHECK1-NEXT: store i64 1, i64* [[X]], align 8 +// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// CHECK1-NEXT: store i8 1, i8* [[Y]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// CHECK1-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8 +// CHECK1-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK1-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8* +// CHECK1-NEXT: [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[X]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP3]] to double +// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 0 +// CHECK1-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// CHECK1-NEXT: store double [[INC]], double* [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_Z3bariPd +// CHECK1-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 +// CHECK1-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]]) +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]]) +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: ret i32 [[TMP9]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// CHECK1-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8 +// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK1-NEXT: [[TMP3:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]] +// CHECK1-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = mul nuw i64 2, [[TMP2]] +// CHECK1-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP9]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i64 40, i1 false) +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1** +// CHECK1-NEXT: store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double** +// CHECK1-NEXT: store double* [[A]], double** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store i8* null, i8** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64* +// CHECK1-NEXT: store i64 [[TMP6]], i64* [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i64* +// CHECK1-NEXT: store i64 [[TMP6]], i64* [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store i8* null, i8** [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64* +// CHECK1-NEXT: store i64 2, i64* [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64* +// CHECK1-NEXT: store i64 2, i64* [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store i8* null, i8** [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64* +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64* +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK1-NEXT: store i8* null, i8** [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16** +// CHECK1-NEXT: store i16* [[VLA]], i16** [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16** +// CHECK1-NEXT: store i16* [[VLA]], i16** [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK1-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, i32* [[TMP39]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 5, i32* [[TMP40]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store i8** [[TMP36]], i8*** [[TMP41]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store i8** [[TMP37]], i8*** [[TMP42]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store i64* [[TMP38]], i64** [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store i8** null, i8*** [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store i8** null, i8*** [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, i64* [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, i64* [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, i32* [[TMP51]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +// CHECK1-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1: omp_offload.failed: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1: omp_offload.cont: +// CHECK1-NEXT: [[TMP54:%.*]] = mul nsw i64 1, [[TMP2]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP54]] +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK1-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP55]] to i32 +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[B]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP56]] +// CHECK1-NEXT: [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP57]]) +// CHECK1-NEXT: ret i32 [[ADD4]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZL7fstatici +// CHECK1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK1-NEXT: store i8 0, i8* [[AAA]], align 1 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* +// CHECK1-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64* +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64* +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store i8* null, i8** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64* +// CHECK1-NEXT: store i64 [[TMP3]], i64* [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64* +// CHECK1-NEXT: store i64 [[TMP3]], i64* [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store i8* null, i8** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]** +// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]** +// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store i8* null, i8** [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 3, i32* [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store i8** [[TMP19]], i8*** [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store i8** [[TMP20]], i8*** [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store i8** null, i8*** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store i8** null, i8*** [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, i64* [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, i32* [[TMP33]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK1-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1: omp_offload.failed: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i64 [[TMP1]], i64 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1: omp_offload.cont: +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: ret i32 [[TMP36]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// CHECK1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i64* +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64* +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store i8* null, i8** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]** +// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]** +// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store i8* null, i8** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, i32* [[TMP14]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 2, i32* [[TMP15]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store i8** [[TMP12]], i8*** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store i8** [[TMP13]], i8*** [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store i8** null, i8*** [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store i8** null, i8*** [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, i64* [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, i64* [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, i32* [[TMP26]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1: omp_offload.failed: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i64 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1: omp_offload.cont: +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: ret i32 [[TMP29]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// CHECK1-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// CHECK1-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// CHECK1-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2 +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// CHECK1-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false) +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double +// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store double [[ADD]], double* [[A]], align 8 +// CHECK1-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load double, double* [[A5]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK1-NEXT: store double [[INC]], double* [[A5]], align 8 +// CHECK1-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 +// CHECK1-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK1-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// CHECK1-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4 +// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 +// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8* +// CHECK1-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// CHECK1-NEXT: store i8 [[CONV5]], i8* [[CONV1]], align 1 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// CHECK1-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK1-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK1-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK1-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@_Z3fooiPd +// CHECK2-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// CHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK2-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// CHECK2-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK2-NEXT: [[P:%.*]] = alloca i32*, align 64 +// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[GA_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4 +// CHECK2-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-NEXT: [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK2-NEXT: store i16 0, i16* [[AA]], align 2 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = call i8* @llvm.stacksave() +// CHECK2-NEXT: store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4 +// CHECK2-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]] +// CHECK2-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// CHECK2-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[X]], align 4 +// CHECK2-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[Y]], align 4 +// CHECK2-NEXT: store i32* [[A]], i32** [[P]], align 64 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[A_CASTED]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32*, i32** [[P]], align 64 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* @ga, align 4 +// CHECK2-NEXT: store i32 [[TMP9]], i32* [[GA_CASTED]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[GA_CASTED]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32* +// CHECK2-NEXT: store i32 [[TMP7]], i32* [[TMP12]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32* +// CHECK2-NEXT: store i32 [[TMP7]], i32* [[TMP14]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK2-NEXT: store i8* null, i8** [[TMP15]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32** +// CHECK2-NEXT: store i32* [[TMP8]], i32** [[TMP17]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32** +// CHECK2-NEXT: store i32* [[TMP8]], i32** [[TMP19]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK2-NEXT: store i8* null, i8** [[TMP20]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32* +// CHECK2-NEXT: store i32 [[TMP10]], i32* [[TMP22]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32* +// CHECK2-NEXT: store i32 [[TMP10]], i32* [[TMP24]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK2-NEXT: store i8* null, i8** [[TMP25]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK2-NEXT: store i32 2, i32* [[TMP28]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK2-NEXT: store i32 3, i32* [[TMP29]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK2-NEXT: store i8** [[TMP26]], i8*** [[TMP30]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK2-NEXT: store i8** [[TMP27]], i8*** [[TMP31]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK2-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP32]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK2-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP33]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK2-NEXT: store i8** null, i8*** [[TMP34]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK2-NEXT: store i8** null, i8*** [[TMP35]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK2-NEXT: store i64 0, i64* [[TMP36]], align 8 +// CHECK2-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK2-NEXT: store i64 0, i64* [[TMP37]], align 8 +// CHECK2-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK2-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP38]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK2-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP39]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK2-NEXT: store i32 0, i32* [[TMP40]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK2-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK2-NEXT: br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK2: omp_offload.failed: +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i32 [[TMP7]], i32* [[TMP8]], i32 [[TMP10]]) #[[ATTR3:[0-9]+]] +// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK2: omp_offload.cont: +// CHECK2-NEXT: [[TMP43:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_CASTED]] to i16* +// CHECK2-NEXT: store i16 [[TMP43]], i16* [[CONV]], align 2 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[AA_CASTED]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = mul nuw i32 [[TMP0]], 4 +// CHECK2-NEXT: [[TMP46:%.*]] = sext i32 [[TMP45]] to i64 +// CHECK2-NEXT: [[TMP47:%.*]] = mul nuw i32 5, [[TMP2]] +// CHECK2-NEXT: [[TMP48:%.*]] = mul nuw i32 [[TMP47]], 8 +// CHECK2-NEXT: [[TMP49:%.*]] = sext i32 [[TMP48]] to i64 +// CHECK2-NEXT: [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP50]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i32 72, i1 false) +// CHECK2-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32* +// CHECK2-NEXT: store i32 [[TMP44]], i32* [[TMP52]], align 4 +// CHECK2-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32* +// CHECK2-NEXT: store i32 [[TMP44]], i32* [[TMP54]], align 4 +// CHECK2-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0 +// CHECK2-NEXT: store i8* null, i8** [[TMP55]], align 4 +// CHECK2-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]** +// CHECK2-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 4 +// CHECK2-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]** +// CHECK2-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 4 +// CHECK2-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1 +// CHECK2-NEXT: store i8* null, i8** [[TMP60]], align 4 +// CHECK2-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32* +// CHECK2-NEXT: store i32 [[TMP0]], i32* [[TMP62]], align 4 +// CHECK2-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32* +// CHECK2-NEXT: store i32 [[TMP0]], i32* [[TMP64]], align 4 +// CHECK2-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 2 +// CHECK2-NEXT: store i8* null, i8** [[TMP65]], align 4 +// CHECK2-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float** +// CHECK2-NEXT: store float* [[VLA]], float** [[TMP67]], align 4 +// CHECK2-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float** +// CHECK2-NEXT: store float* [[VLA]], float** [[TMP69]], align 4 +// CHECK2-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3 +// CHECK2-NEXT: store i64 [[TMP46]], i64* [[TMP70]], align 4 +// CHECK2-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 3 +// CHECK2-NEXT: store i8* null, i8** [[TMP71]], align 4 +// CHECK2-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]** +// CHECK2-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 4 +// CHECK2-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]** +// CHECK2-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 4 +// CHECK2-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 4 +// CHECK2-NEXT: store i8* null, i8** [[TMP76]], align 4 +// CHECK2-NEXT: [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5 +// CHECK2-NEXT: [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32* +// CHECK2-NEXT: store i32 5, i32* [[TMP78]], align 4 +// CHECK2-NEXT: [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 5 +// CHECK2-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32* +// CHECK2-NEXT: store i32 5, i32* [[TMP80]], align 4 +// CHECK2-NEXT: [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 5 +// CHECK2-NEXT: store i8* null, i8** [[TMP81]], align 4 +// CHECK2-NEXT: [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6 +// CHECK2-NEXT: [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32* +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[TMP83]], align 4 +// CHECK2-NEXT: [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 6 +// CHECK2-NEXT: [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32* +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[TMP85]], align 4 +// CHECK2-NEXT: [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 6 +// CHECK2-NEXT: store i8* null, i8** [[TMP86]], align 4 +// CHECK2-NEXT: [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7 +// CHECK2-NEXT: [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double** +// CHECK2-NEXT: store double* [[VLA1]], double** [[TMP88]], align 4 +// CHECK2-NEXT: [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 7 +// CHECK2-NEXT: [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double** +// CHECK2-NEXT: store double* [[VLA1]], double** [[TMP90]], align 4 +// CHECK2-NEXT: [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7 +// CHECK2-NEXT: store i64 [[TMP49]], i64* [[TMP91]], align 4 +// CHECK2-NEXT: [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 7 +// CHECK2-NEXT: store i8* null, i8** [[TMP92]], align 4 +// CHECK2-NEXT: [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8 +// CHECK2-NEXT: [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT** +// CHECK2-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 4 +// CHECK2-NEXT: [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 8 +// CHECK2-NEXT: [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT** +// CHECK2-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 4 +// CHECK2-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 8 +// CHECK2-NEXT: store i8* null, i8** [[TMP97]], align 4 +// CHECK2-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 0 +// CHECK2-NEXT: store i32 2, i32* [[TMP101]], align 4 +// CHECK2-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 1 +// CHECK2-NEXT: store i32 9, i32* [[TMP102]], align 4 +// CHECK2-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 2 +// CHECK2-NEXT: store i8** [[TMP98]], i8*** [[TMP103]], align 4 +// CHECK2-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 3 +// CHECK2-NEXT: store i8** [[TMP99]], i8*** [[TMP104]], align 4 +// CHECK2-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 4 +// CHECK2-NEXT: store i64* [[TMP100]], i64** [[TMP105]], align 4 +// CHECK2-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 5 +// CHECK2-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 4 +// CHECK2-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 6 +// CHECK2-NEXT: store i8** null, i8*** [[TMP107]], align 4 +// CHECK2-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 7 +// CHECK2-NEXT: store i8** null, i8*** [[TMP108]], align 4 +// CHECK2-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 8 +// CHECK2-NEXT: store i64 0, i64* [[TMP109]], align 8 +// CHECK2-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 9 +// CHECK2-NEXT: store i64 0, i64* [[TMP110]], align 8 +// CHECK2-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 10 +// CHECK2-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4 +// CHECK2-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 11 +// CHECK2-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4 +// CHECK2-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 12 +// CHECK2-NEXT: store i32 0, i32* [[TMP113]], align 4 +// CHECK2-NEXT: [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]]) +// CHECK2-NEXT: [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0 +// CHECK2-NEXT: br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CHECK2: omp_offload.failed6: +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i32 [[TMP44]], [10 x float]* [[B]], i32 [[TMP0]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP2]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]] +// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CHECK2: omp_offload.cont7: +// CHECK2-NEXT: [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK2-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double** +// CHECK2-NEXT: store double* [[TMP116]], double** [[TMP118]], align 4 +// CHECK2-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double** +// CHECK2-NEXT: store double* [[TMP116]], double** [[TMP120]], align 4 +// CHECK2-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0 +// CHECK2-NEXT: store i8* null, i8** [[TMP121]], align 4 +// CHECK2-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0** +// CHECK2-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 4 +// CHECK2-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0** +// CHECK2-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 4 +// CHECK2-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1 +// CHECK2-NEXT: store i8* null, i8** [[TMP126]], align 4 +// CHECK2-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 0 +// CHECK2-NEXT: store i32 2, i32* [[TMP129]], align 4 +// CHECK2-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 1 +// CHECK2-NEXT: store i32 2, i32* [[TMP130]], align 4 +// CHECK2-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 2 +// CHECK2-NEXT: store i8** [[TMP127]], i8*** [[TMP131]], align 4 +// CHECK2-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 3 +// CHECK2-NEXT: store i8** [[TMP128]], i8*** [[TMP132]], align 4 +// CHECK2-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 4 +// CHECK2-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 4 +// CHECK2-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 5 +// CHECK2-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 4 +// CHECK2-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 6 +// CHECK2-NEXT: store i8** null, i8*** [[TMP135]], align 4 +// CHECK2-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 7 +// CHECK2-NEXT: store i8** null, i8*** [[TMP136]], align 4 +// CHECK2-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 8 +// CHECK2-NEXT: store i64 0, i64* [[TMP137]], align 8 +// CHECK2-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 9 +// CHECK2-NEXT: store i64 0, i64* [[TMP138]], align 8 +// CHECK2-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 10 +// CHECK2-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4 +// CHECK2-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 11 +// CHECK2-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4 +// CHECK2-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 12 +// CHECK2-NEXT: store i32 0, i32* [[TMP141]], align 4 +// CHECK2-NEXT: [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]]) +// CHECK2-NEXT: [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0 +// CHECK2-NEXT: br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]] +// CHECK2: omp_offload.failed12: +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]] +// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT13]] +// CHECK2: omp_offload.cont13: +// CHECK2-NEXT: [[TMP144:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK2-NEXT: call void @llvm.stackrestore(i8* [[TMP145]]) +// CHECK2-NEXT: ret i32 [[TMP144]] +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// CHECK2-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK2-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[GA]], i32* [[GA_ADDR]], align 4 +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// CHECK2-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 +// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 +// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 +// CHECK2-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 +// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 +// CHECK2-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// CHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK2-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// CHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 +// CHECK2-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// CHECK2-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 +// CHECK2-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// CHECK2-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 +// CHECK2-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 +// CHECK2-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 +// CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// CHECK2-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false) +// CHECK2-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4 +// CHECK2-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4 +// CHECK2-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// CHECK2-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false) +// CHECK2-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// CHECK2-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false) +// CHECK2-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8 +// CHECK2-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// CHECK2-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false) +// CHECK2-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// CHECK2-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false) +// CHECK2-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK2-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// CHECK2-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK2-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2 +// CHECK2-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3 +// CHECK2-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// CHECK2-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1 +// CHECK2-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 +// CHECK2-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]] +// CHECK2-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]] +// CHECK2-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3 +// CHECK2-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// CHECK2-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// CHECK2-NEXT: store i64 1, i64* [[X]], align 4 +// CHECK2-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// CHECK2-NEXT: store i8 1, i8* [[Y]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK2-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// CHECK2-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK2-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4 +// CHECK2-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK2-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK2-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8* +// CHECK2-NEXT: [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 8, i1 false) +// CHECK2-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[X]], align 4 +// CHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP3]] to double +// CHECK2-NEXT: [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i32 0 +// CHECK2-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i32 0 +// CHECK2-NEXT: [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// CHECK2-NEXT: store double [[INC]], double* [[ARRAYIDX2]], align 4 +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@_Z3bariPd +// CHECK2-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 +// CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]]) +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]]) +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]]) +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// CHECK2-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]]) +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// CHECK2-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: ret i32 [[TMP9]] +// +// +// CHECK2-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// CHECK2-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4 +// CHECK2-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK2-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// CHECK2-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]] +// CHECK2-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[B]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP6:%.*]] = mul nuw i32 2, [[TMP1]] +// CHECK2-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// CHECK2-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 +// CHECK2-NEXT: [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i32 40, i1 false) +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1** +// CHECK2-NEXT: store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double** +// CHECK2-NEXT: store double* [[A]], double** [[TMP13]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK2-NEXT: store i8* null, i8** [[TMP14]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i32* +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[TMP16]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32* +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[TMP18]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK2-NEXT: store i8* null, i8** [[TMP19]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32* +// CHECK2-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32* +// CHECK2-NEXT: store i32 2, i32* [[TMP23]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK2-NEXT: store i8* null, i8** [[TMP24]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32* +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[TMP26]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32* +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[TMP28]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK2-NEXT: store i8* null, i8** [[TMP29]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16** +// CHECK2-NEXT: store i16* [[VLA]], i16** [[TMP31]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16** +// CHECK2-NEXT: store i16* [[VLA]], i16** [[TMP33]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK2-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 +// CHECK2-NEXT: store i8* null, i8** [[TMP35]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK2-NEXT: store i32 2, i32* [[TMP39]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK2-NEXT: store i32 5, i32* [[TMP40]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK2-NEXT: store i8** [[TMP36]], i8*** [[TMP41]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK2-NEXT: store i8** [[TMP37]], i8*** [[TMP42]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK2-NEXT: store i64* [[TMP38]], i64** [[TMP43]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK2-NEXT: store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK2-NEXT: store i8** null, i8*** [[TMP45]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK2-NEXT: store i8** null, i8*** [[TMP46]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK2-NEXT: store i64 0, i64* [[TMP47]], align 8 +// CHECK2-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK2-NEXT: store i64 0, i64* [[TMP48]], align 8 +// CHECK2-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK2-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4 +// CHECK2-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK2-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4 +// CHECK2-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK2-NEXT: store i32 0, i32* [[TMP51]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK2-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +// CHECK2-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK2: omp_offload.failed: +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]] +// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK2: omp_offload.cont: +// CHECK2-NEXT: [[TMP54:%.*]] = mul nsw i32 1, [[TMP1]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP54]] +// CHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK2-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP55]] to i32 +// CHECK2-NEXT: [[TMP56:%.*]] = load i32, i32* [[B]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP56]] +// CHECK2-NEXT: [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK2-NEXT: call void @llvm.stackrestore(i8* [[TMP57]]) +// CHECK2-NEXT: ret i32 [[ADD3]] +// +// +// CHECK2-LABEL: define {{[^@]+}}@_ZL7fstatici +// CHECK2-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// CHECK2-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK2-NEXT: store i8 0, i8* [[AAA]], align 1 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* +// CHECK2-NEXT: store i8 [[TMP2]], i8* [[CONV]], align 1 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32* +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32* +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK2-NEXT: store i8* null, i8** [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32* +// CHECK2-NEXT: store i32 [[TMP3]], i32* [[TMP10]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32* +// CHECK2-NEXT: store i32 [[TMP3]], i32* [[TMP12]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK2-NEXT: store i8* null, i8** [[TMP13]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]** +// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]** +// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK2-NEXT: store i8* null, i8** [[TMP18]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK2-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK2-NEXT: store i32 3, i32* [[TMP22]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK2-NEXT: store i8** [[TMP19]], i8*** [[TMP23]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK2-NEXT: store i8** [[TMP20]], i8*** [[TMP24]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK2-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK2-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK2-NEXT: store i8** null, i8*** [[TMP27]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK2-NEXT: store i8** null, i8*** [[TMP28]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK2-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK2-NEXT: store i64 0, i64* [[TMP30]], align 8 +// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK2-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK2-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK2-NEXT: store i32 0, i32* [[TMP33]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK2-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK2-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK2: omp_offload.failed: +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i32 [[TMP1]], i32 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK2: omp_offload.cont: +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: ret i32 [[TMP36]] +// +// +// CHECK2-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// CHECK2-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32* +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32* +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK2-NEXT: store i8* null, i8** [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]** +// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]** +// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK2-NEXT: store i8* null, i8** [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK2-NEXT: store i32 2, i32* [[TMP14]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK2-NEXT: store i32 2, i32* [[TMP15]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK2-NEXT: store i8** [[TMP12]], i8*** [[TMP16]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK2-NEXT: store i8** [[TMP13]], i8*** [[TMP17]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK2-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK2-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK2-NEXT: store i8** null, i8*** [[TMP20]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK2-NEXT: store i8** null, i8*** [[TMP21]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK2-NEXT: store i64 0, i64* [[TMP22]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK2-NEXT: store i64 0, i64* [[TMP23]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK2-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK2-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK2-NEXT: store i32 0, i32* [[TMP26]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK2-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK2-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK2: omp_offload.failed: +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i32 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK2: omp_offload.cont: +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: ret i32 [[TMP29]] +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// CHECK2-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// CHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// CHECK2-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// CHECK2-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// CHECK2-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// CHECK2-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// CHECK2-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false) +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double +// CHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: store double [[ADD]], double* [[A]], align 4 +// CHECK2-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP11:%.*]] = load double, double* [[A4]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK2-NEXT: store double [[INC]], double* [[A4]], align 4 +// CHECK2-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// CHECK2-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]] +// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK2-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// CHECK2-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK2-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// CHECK2-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK2-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 +// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* +// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK2-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK2-NEXT: [[CONV2:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 +// CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 +// CHECK2-NEXT: store i8 [[CONV4]], i8* [[CONV]], align 1 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK2-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// CHECK2-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK2-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK2-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK2-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK2-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_Z3fooiPd +// CHECK3-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// CHECK3-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK3-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// CHECK3-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK3-NEXT: [[P:%.*]] = alloca i32*, align 64 +// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[GA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4 +// CHECK3-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-NEXT: [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK3-NEXT: store i16 0, i16* [[AA]], align 2 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = call i8* @llvm.stacksave() +// CHECK3-NEXT: store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4 +// CHECK3-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4 +// CHECK3-NEXT: store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]] +// CHECK3-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[X]], align 4 +// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[Y]], align 4 +// CHECK3-NEXT: store i32* [[A]], i32** [[P]], align 64 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[A_CASTED]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[P]], align 64 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* @ga, align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[GA_CASTED]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[GA_CASTED]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32* +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32* +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store i8* null, i8** [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32** +// CHECK3-NEXT: store i32* [[TMP8]], i32** [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32** +// CHECK3-NEXT: store i32* [[TMP8]], i32** [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store i8* null, i8** [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32* +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32* +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-NEXT: store i8* null, i8** [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, i32* [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 3, i32* [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store i8** [[TMP26]], i8*** [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store i8** [[TMP27]], i8*** [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP32]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP33]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store i8** null, i8*** [[TMP34]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store i8** null, i8*** [[TMP35]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, i64* [[TMP36]], align 8 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, i64* [[TMP37]], align 8 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP38]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, i32* [[TMP40]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK3-NEXT: br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3: omp_offload.failed: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i32 [[TMP7]], i32* [[TMP8]], i32 [[TMP10]]) #[[ATTR3:[0-9]+]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3: omp_offload.cont: +// CHECK3-NEXT: [[TMP43:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_CASTED]] to i16* +// CHECK3-NEXT: store i16 [[TMP43]], i16* [[CONV]], align 2 +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[AA_CASTED]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = mul nuw i32 [[TMP0]], 4 +// CHECK3-NEXT: [[TMP46:%.*]] = sext i32 [[TMP45]] to i64 +// CHECK3-NEXT: [[TMP47:%.*]] = mul nuw i32 5, [[TMP2]] +// CHECK3-NEXT: [[TMP48:%.*]] = mul nuw i32 [[TMP47]], 8 +// CHECK3-NEXT: [[TMP49:%.*]] = sext i32 [[TMP48]] to i64 +// CHECK3-NEXT: [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP50]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i32 72, i1 false) +// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32* +// CHECK3-NEXT: store i32 [[TMP44]], i32* [[TMP52]], align 4 +// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32* +// CHECK3-NEXT: store i32 [[TMP44]], i32* [[TMP54]], align 4 +// CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0 +// CHECK3-NEXT: store i8* null, i8** [[TMP55]], align 4 +// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]** +// CHECK3-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 4 +// CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]** +// CHECK3-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 4 +// CHECK3-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1 +// CHECK3-NEXT: store i8* null, i8** [[TMP60]], align 4 +// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32* +// CHECK3-NEXT: store i32 [[TMP0]], i32* [[TMP62]], align 4 +// CHECK3-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32* +// CHECK3-NEXT: store i32 [[TMP0]], i32* [[TMP64]], align 4 +// CHECK3-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 2 +// CHECK3-NEXT: store i8* null, i8** [[TMP65]], align 4 +// CHECK3-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float** +// CHECK3-NEXT: store float* [[VLA]], float** [[TMP67]], align 4 +// CHECK3-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float** +// CHECK3-NEXT: store float* [[VLA]], float** [[TMP69]], align 4 +// CHECK3-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3 +// CHECK3-NEXT: store i64 [[TMP46]], i64* [[TMP70]], align 4 +// CHECK3-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 3 +// CHECK3-NEXT: store i8* null, i8** [[TMP71]], align 4 +// CHECK3-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]** +// CHECK3-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 4 +// CHECK3-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]** +// CHECK3-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 4 +// CHECK3-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 4 +// CHECK3-NEXT: store i8* null, i8** [[TMP76]], align 4 +// CHECK3-NEXT: [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32* +// CHECK3-NEXT: store i32 5, i32* [[TMP78]], align 4 +// CHECK3-NEXT: [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32* +// CHECK3-NEXT: store i32 5, i32* [[TMP80]], align 4 +// CHECK3-NEXT: [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 5 +// CHECK3-NEXT: store i8* null, i8** [[TMP81]], align 4 +// CHECK3-NEXT: [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32* +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP83]], align 4 +// CHECK3-NEXT: [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32* +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP85]], align 4 +// CHECK3-NEXT: [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 6 +// CHECK3-NEXT: store i8* null, i8** [[TMP86]], align 4 +// CHECK3-NEXT: [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double** +// CHECK3-NEXT: store double* [[VLA1]], double** [[TMP88]], align 4 +// CHECK3-NEXT: [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double** +// CHECK3-NEXT: store double* [[VLA1]], double** [[TMP90]], align 4 +// CHECK3-NEXT: [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7 +// CHECK3-NEXT: store i64 [[TMP49]], i64* [[TMP91]], align 4 +// CHECK3-NEXT: [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 7 +// CHECK3-NEXT: store i8* null, i8** [[TMP92]], align 4 +// CHECK3-NEXT: [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT** +// CHECK3-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 4 +// CHECK3-NEXT: [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT** +// CHECK3-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 4 +// CHECK3-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 8 +// CHECK3-NEXT: store i8* null, i8** [[TMP97]], align 4 +// CHECK3-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, i32* [[TMP101]], align 4 +// CHECK3-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 1 +// CHECK3-NEXT: store i32 9, i32* [[TMP102]], align 4 +// CHECK3-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 2 +// CHECK3-NEXT: store i8** [[TMP98]], i8*** [[TMP103]], align 4 +// CHECK3-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 3 +// CHECK3-NEXT: store i8** [[TMP99]], i8*** [[TMP104]], align 4 +// CHECK3-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 4 +// CHECK3-NEXT: store i64* [[TMP100]], i64** [[TMP105]], align 4 +// CHECK3-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 5 +// CHECK3-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 4 +// CHECK3-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 6 +// CHECK3-NEXT: store i8** null, i8*** [[TMP107]], align 4 +// CHECK3-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 7 +// CHECK3-NEXT: store i8** null, i8*** [[TMP108]], align 4 +// CHECK3-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, i64* [[TMP109]], align 8 +// CHECK3-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, i64* [[TMP110]], align 8 +// CHECK3-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4 +// CHECK3-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4 +// CHECK3-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, i32* [[TMP113]], align 4 +// CHECK3-NEXT: [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]]) +// CHECK3-NEXT: [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0 +// CHECK3-NEXT: br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CHECK3: omp_offload.failed6: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i32 [[TMP44]], [10 x float]* [[B]], i32 [[TMP0]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP2]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CHECK3: omp_offload.cont7: +// CHECK3-NEXT: [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double** +// CHECK3-NEXT: store double* [[TMP116]], double** [[TMP118]], align 4 +// CHECK3-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double** +// CHECK3-NEXT: store double* [[TMP116]], double** [[TMP120]], align 4 +// CHECK3-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0 +// CHECK3-NEXT: store i8* null, i8** [[TMP121]], align 4 +// CHECK3-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0** +// CHECK3-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 4 +// CHECK3-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0** +// CHECK3-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 4 +// CHECK3-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1 +// CHECK3-NEXT: store i8* null, i8** [[TMP126]], align 4 +// CHECK3-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, i32* [[TMP129]], align 4 +// CHECK3-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 1 +// CHECK3-NEXT: store i32 2, i32* [[TMP130]], align 4 +// CHECK3-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 2 +// CHECK3-NEXT: store i8** [[TMP127]], i8*** [[TMP131]], align 4 +// CHECK3-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 3 +// CHECK3-NEXT: store i8** [[TMP128]], i8*** [[TMP132]], align 4 +// CHECK3-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 4 +// CHECK3-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 4 +// CHECK3-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 5 +// CHECK3-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 4 +// CHECK3-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 6 +// CHECK3-NEXT: store i8** null, i8*** [[TMP135]], align 4 +// CHECK3-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 7 +// CHECK3-NEXT: store i8** null, i8*** [[TMP136]], align 4 +// CHECK3-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, i64* [[TMP137]], align 8 +// CHECK3-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, i64* [[TMP138]], align 8 +// CHECK3-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4 +// CHECK3-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4 +// CHECK3-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, i32* [[TMP141]], align 4 +// CHECK3-NEXT: [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]]) +// CHECK3-NEXT: [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0 +// CHECK3-NEXT: br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]] +// CHECK3: omp_offload.failed12: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT13]] +// CHECK3: omp_offload.cont13: +// CHECK3-NEXT: [[TMP144:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK3-NEXT: call void @llvm.stackrestore(i8* [[TMP145]]) +// CHECK3-NEXT: ret i32 [[TMP144]] +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// CHECK3-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[GA]], i32* [[GA_ADDR]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// CHECK3-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 +// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 +// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 +// CHECK3-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// CHECK3-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK3-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 +// CHECK3-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// CHECK3-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 +// CHECK3-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// CHECK3-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 +// CHECK3-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 +// CHECK3-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false) +// CHECK3-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// CHECK3-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4 +// CHECK3-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4 +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// CHECK3-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false) +// CHECK3-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// CHECK3-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false) +// CHECK3-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// CHECK3-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// CHECK3-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8 +// CHECK3-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// CHECK3-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false) +// CHECK3-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// CHECK3-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false) +// CHECK3-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// CHECK3-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK3-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2 +// CHECK3-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3 +// CHECK3-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// CHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1 +// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 +// CHECK3-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]] +// CHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]] +// CHECK3-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3 +// CHECK3-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// CHECK3-NEXT: store i64 1, i64* [[X]], align 4 +// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// CHECK3-NEXT: store i8 1, i8* [[Y]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK3-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// CHECK3-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4 +// CHECK3-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK3-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8* +// CHECK3-NEXT: [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[X]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP3]] to double +// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i32 0 +// CHECK3-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// CHECK3-NEXT: store double [[INC]], double* [[ARRAYIDX2]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_Z3bariPd +// CHECK3-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]]) +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]]) +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: ret i32 [[TMP9]] +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// CHECK3-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4 +// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// CHECK3-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]] +// CHECK3-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = mul nuw i32 2, [[TMP1]] +// CHECK3-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// CHECK3-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i32 40, i1 false) +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1** +// CHECK3-NEXT: store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double** +// CHECK3-NEXT: store double* [[A]], double** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store i8* null, i8** [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i32* +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32* +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store i8* null, i8** [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32* +// CHECK3-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32* +// CHECK3-NEXT: store i32 2, i32* [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-NEXT: store i8* null, i8** [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32* +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32* +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK3-NEXT: store i8* null, i8** [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16** +// CHECK3-NEXT: store i16* [[VLA]], i16** [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16** +// CHECK3-NEXT: store i16* [[VLA]], i16** [[TMP33]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK3-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 +// CHECK3-NEXT: store i8* null, i8** [[TMP35]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, i32* [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 5, i32* [[TMP40]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store i8** [[TMP36]], i8*** [[TMP41]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store i8** [[TMP37]], i8*** [[TMP42]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store i64* [[TMP38]], i64** [[TMP43]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store i8** null, i8*** [[TMP45]], align 4 +// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store i8** null, i8*** [[TMP46]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, i64* [[TMP47]], align 8 +// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, i64* [[TMP48]], align 8 +// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4 +// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4 +// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, i32* [[TMP51]], align 4 +// CHECK3-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +// CHECK3-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3: omp_offload.failed: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3: omp_offload.cont: +// CHECK3-NEXT: [[TMP54:%.*]] = mul nsw i32 1, [[TMP1]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP54]] +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK3-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP55]] to i32 +// CHECK3-NEXT: [[TMP56:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP56]] +// CHECK3-NEXT: [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK3-NEXT: call void @llvm.stackrestore(i8* [[TMP57]]) +// CHECK3-NEXT: ret i32 [[ADD3]] +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZL7fstatici +// CHECK3-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK3-NEXT: store i8 0, i8* [[AAA]], align 1 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* +// CHECK3-NEXT: store i8 [[TMP2]], i8* [[CONV]], align 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32* +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32* +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store i8* null, i8** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32* +// CHECK3-NEXT: store i32 [[TMP3]], i32* [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32* +// CHECK3-NEXT: store i32 [[TMP3]], i32* [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store i8* null, i8** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]** +// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]** +// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-NEXT: store i8* null, i8** [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 3, i32* [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store i8** [[TMP19]], i8*** [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store i8** [[TMP20]], i8*** [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store i8** null, i8*** [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store i8** null, i8*** [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, i64* [[TMP30]], align 8 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, i32* [[TMP33]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3: omp_offload.failed: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i32 [[TMP1]], i32 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3: omp_offload.cont: +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: ret i32 [[TMP36]] +// +// +// CHECK3-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// CHECK3-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32* +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32* +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store i8* null, i8** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]** +// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]** +// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store i8* null, i8** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, i32* [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 2, i32* [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store i8** [[TMP12]], i8*** [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store i8** [[TMP13]], i8*** [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store i8** null, i8*** [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store i8** null, i8*** [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, i64* [[TMP22]], align 8 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, i64* [[TMP23]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, i32* [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK3-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3: omp_offload.failed: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i32 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3: omp_offload.cont: +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: ret i32 [[TMP29]] +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// CHECK3-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// CHECK3-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// CHECK3-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// CHECK3-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// CHECK3-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// CHECK3-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double +// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: store double [[ADD]], double* [[A]], align 4 +// CHECK3-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load double, double* [[A4]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK3-NEXT: store double [[INC]], double* [[A4]], align 4 +// CHECK3-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// CHECK3-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK3-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// CHECK3-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK3-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// CHECK3-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 +// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* +// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK3-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK3-NEXT: [[CONV2:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 +// CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 +// CHECK3-NEXT: store i8 [[CONV4]], i8* [[CONV]], align 1 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// CHECK3-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK3-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK3-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK3-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_Z3fooiPd +// SIMD-ONLY0-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// SIMD-ONLY0-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[AA:%.*]] = alloca i16, align 2 +// SIMD-ONLY0-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// SIMD-ONLY0-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// SIMD-ONLY0-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// SIMD-ONLY0-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// SIMD-ONLY0-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// SIMD-ONLY0-NEXT: [[P:%.*]] = alloca i32*, align 64 +// SIMD-ONLY0-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: store i16 0, i16* [[AA]], align 2 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY0-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY0-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4 +// SIMD-ONLY0-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]] +// SIMD-ONLY0-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// SIMD-ONLY0-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP6]], i32* [[X]], align 4 +// SIMD-ONLY0-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP7]], i32* [[Y]], align 4 +// SIMD-ONLY0-NEXT: store i32* [[A]], i32** [[P]], align 64 +// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = load i16, i16* [[AA]], align 2 +// SIMD-ONLY0-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// SIMD-ONLY0-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY0-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// SIMD-ONLY0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B]], i64 0, i64 2 +// SIMD-ONLY0-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// SIMD-ONLY0-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[VLA]], i64 3 +// SIMD-ONLY0-NEXT: store float 1.000000e+00, float* [[ARRAYIDX3]], align 4 +// SIMD-ONLY0-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C]], i64 0, i64 1 +// SIMD-ONLY0-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX4]], i64 0, i64 2 +// SIMD-ONLY0-NEXT: store double 1.000000e+00, double* [[ARRAYIDX5]], align 8 +// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = mul nsw i64 1, [[TMP4]] +// SIMD-ONLY0-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[VLA1]], i64 [[TMP9]] +// SIMD-ONLY0-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX6]], i64 3 +// SIMD-ONLY0-NEXT: store double 1.000000e+00, double* [[ARRAYIDX7]], align 8 +// SIMD-ONLY0-NEXT: [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: store i64 1, i64* [[X8]], align 8 +// SIMD-ONLY0-NEXT: [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 1 +// SIMD-ONLY0-NEXT: store i8 1, i8* [[Y9]], align 8 +// SIMD-ONLY0-NEXT: [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP10:%.*]] = load i32, i32* [[X10]], align 4 +// SIMD-ONLY0-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP10]] to double +// SIMD-ONLY0-NEXT: [[TMP11:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP11]], i64 0 +// SIMD-ONLY0-NEXT: store double [[CONV11]], double* [[ARRAYIDX12]], align 8 +// SIMD-ONLY0-NEXT: [[TMP12:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP12]], i64 0 +// SIMD-ONLY0-NEXT: [[TMP13:%.*]] = load double, double* [[ARRAYIDX13]], align 8 +// SIMD-ONLY0-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 +// SIMD-ONLY0-NEXT: store double [[INC]], double* [[ARRAYIDX13]], align 8 +// SIMD-ONLY0-NEXT: [[TMP14:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[TMP15:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY0-NEXT: call void @llvm.stackrestore(i8* [[TMP15]]) +// SIMD-ONLY0-NEXT: ret i32 [[TMP14]] +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_Z3bariPd +// SIMD-ONLY0-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// SIMD-ONLY0-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 +// SIMD-ONLY0-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]]) +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// SIMD-ONLY0-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]]) +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// SIMD-ONLY0-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]]) +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// SIMD-ONLY0-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]]) +// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// SIMD-ONLY0-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: ret i32 [[TMP9]] +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// SIMD-ONLY0-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// SIMD-ONLY0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[B:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// SIMD-ONLY0-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY0-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY0-NEXT: store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]] +// SIMD-ONLY0-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2 +// SIMD-ONLY0-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY0-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double +// SIMD-ONLY0-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// SIMD-ONLY0-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: store double [[ADD2]], double* [[A]], align 8 +// SIMD-ONLY0-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = load double, double* [[A3]], align 8 +// SIMD-ONLY0-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// SIMD-ONLY0-NEXT: store double [[INC]], double* [[A3]], align 8 +// SIMD-ONLY0-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = mul nsw i64 1, [[TMP2]] +// SIMD-ONLY0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP7]] +// SIMD-ONLY0-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// SIMD-ONLY0-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2 +// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = mul nsw i64 1, [[TMP2]] +// SIMD-ONLY0-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP8]] +// SIMD-ONLY0-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX6]], i64 1 +// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2 +// SIMD-ONLY0-NEXT: [[CONV8:%.*]] = sext i16 [[TMP9]] to i32 +// SIMD-ONLY0-NEXT: [[TMP10:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY0-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] +// SIMD-ONLY0-NEXT: [[TMP11:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY0-NEXT: call void @llvm.stackrestore(i8* [[TMP11]]) +// SIMD-ONLY0-NEXT: ret i32 [[ADD9]] +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZL7fstatici +// SIMD-ONLY0-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// SIMD-ONLY0-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY0-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: store i8 0, i8* [[AAA]], align 1 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY0-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i8, i8* [[AAA]], align 1 +// SIMD-ONLY0-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// SIMD-ONLY0-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY0-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV2]], i8* [[AAA]], align 1 +// SIMD-ONLY0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i64 0, i64 2 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY0-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1 +// SIMD-ONLY0-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: ret i32 [[TMP3]] +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// SIMD-ONLY0-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY0-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY0-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i64 0, i64 2 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY0-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1 +// SIMD-ONLY0-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: ret i32 [[TMP2]] +// +// +// SIMD-ONLY01-LABEL: define {{[^@]+}}@_Z3fooiPd +// SIMD-ONLY01-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY01-NEXT: entry: +// SIMD-ONLY01-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY01-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// SIMD-ONLY01-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY01-NEXT: [[AA:%.*]] = alloca i16, align 2 +// SIMD-ONLY01-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// SIMD-ONLY01-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// SIMD-ONLY01-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// SIMD-ONLY01-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// SIMD-ONLY01-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// SIMD-ONLY01-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// SIMD-ONLY01-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// SIMD-ONLY01-NEXT: [[P:%.*]] = alloca i32*, align 64 +// SIMD-ONLY01-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: store i16 0, i16* [[AA]], align 2 +// SIMD-ONLY01-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// SIMD-ONLY01-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY01-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY01-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4 +// SIMD-ONLY01-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// SIMD-ONLY01-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// SIMD-ONLY01-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]] +// SIMD-ONLY01-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8 +// SIMD-ONLY01-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// SIMD-ONLY01-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY01-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: store i32 [[TMP6]], i32* [[X]], align 4 +// SIMD-ONLY01-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// SIMD-ONLY01-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: store i32 [[TMP7]], i32* [[Y]], align 4 +// SIMD-ONLY01-NEXT: store i32* [[A]], i32** [[P]], align 64 +// SIMD-ONLY01-NEXT: [[TMP8:%.*]] = load i16, i16* [[AA]], align 2 +// SIMD-ONLY01-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// SIMD-ONLY01-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY01-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 +// SIMD-ONLY01-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// SIMD-ONLY01-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B]], i64 0, i64 2 +// SIMD-ONLY01-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// SIMD-ONLY01-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[VLA]], i64 3 +// SIMD-ONLY01-NEXT: store float 1.000000e+00, float* [[ARRAYIDX3]], align 4 +// SIMD-ONLY01-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C]], i64 0, i64 1 +// SIMD-ONLY01-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX4]], i64 0, i64 2 +// SIMD-ONLY01-NEXT: store double 1.000000e+00, double* [[ARRAYIDX5]], align 8 +// SIMD-ONLY01-NEXT: [[TMP9:%.*]] = mul nsw i64 1, [[TMP4]] +// SIMD-ONLY01-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[VLA1]], i64 [[TMP9]] +// SIMD-ONLY01-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX6]], i64 3 +// SIMD-ONLY01-NEXT: store double 1.000000e+00, double* [[ARRAYIDX7]], align 8 +// SIMD-ONLY01-NEXT: [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 0 +// SIMD-ONLY01-NEXT: store i64 1, i64* [[X8]], align 8 +// SIMD-ONLY01-NEXT: [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 1 +// SIMD-ONLY01-NEXT: store i8 1, i8* [[Y9]], align 8 +// SIMD-ONLY01-NEXT: [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY01-NEXT: [[TMP10:%.*]] = load i32, i32* [[X10]], align 4 +// SIMD-ONLY01-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP10]] to double +// SIMD-ONLY01-NEXT: [[TMP11:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP11]], i64 0 +// SIMD-ONLY01-NEXT: store double [[CONV11]], double* [[ARRAYIDX12]], align 8 +// SIMD-ONLY01-NEXT: [[TMP12:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP12]], i64 0 +// SIMD-ONLY01-NEXT: [[TMP13:%.*]] = load double, double* [[ARRAYIDX13]], align 8 +// SIMD-ONLY01-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 +// SIMD-ONLY01-NEXT: store double [[INC]], double* [[ARRAYIDX13]], align 8 +// SIMD-ONLY01-NEXT: [[TMP14:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[TMP15:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY01-NEXT: call void @llvm.stackrestore(i8* [[TMP15]]) +// SIMD-ONLY01-NEXT: ret i32 [[TMP14]] +// +// +// SIMD-ONLY01-LABEL: define {{[^@]+}}@_Z3bariPd +// SIMD-ONLY01-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// SIMD-ONLY01-NEXT: entry: +// SIMD-ONLY01-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY01-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// SIMD-ONLY01-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY01-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 +// SIMD-ONLY01-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]]) +// SIMD-ONLY01-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// SIMD-ONLY01-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]]) +// SIMD-ONLY01-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// SIMD-ONLY01-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]]) +// SIMD-ONLY01-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// SIMD-ONLY01-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]]) +// SIMD-ONLY01-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// SIMD-ONLY01-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: ret i32 [[TMP9]] +// +// +// SIMD-ONLY01-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// SIMD-ONLY01-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// SIMD-ONLY01-NEXT: entry: +// SIMD-ONLY01-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// SIMD-ONLY01-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY01-NEXT: [[B:%.*]] = alloca i32, align 4 +// SIMD-ONLY01-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// SIMD-ONLY01-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// SIMD-ONLY01-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// SIMD-ONLY01-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// SIMD-ONLY01-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY01-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// SIMD-ONLY01-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +// SIMD-ONLY01-NEXT: [[TMP3:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY01-NEXT: store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY01-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]] +// SIMD-ONLY01-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2 +// SIMD-ONLY01-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8 +// SIMD-ONLY01-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY01-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double +// SIMD-ONLY01-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// SIMD-ONLY01-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY01-NEXT: store double [[ADD2]], double* [[A]], align 8 +// SIMD-ONLY01-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY01-NEXT: [[TMP6:%.*]] = load double, double* [[A3]], align 8 +// SIMD-ONLY01-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// SIMD-ONLY01-NEXT: store double [[INC]], double* [[A3]], align 8 +// SIMD-ONLY01-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// SIMD-ONLY01-NEXT: [[TMP7:%.*]] = mul nsw i64 1, [[TMP2]] +// SIMD-ONLY01-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP7]] +// SIMD-ONLY01-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// SIMD-ONLY01-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2 +// SIMD-ONLY01-NEXT: [[TMP8:%.*]] = mul nsw i64 1, [[TMP2]] +// SIMD-ONLY01-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP8]] +// SIMD-ONLY01-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX6]], i64 1 +// SIMD-ONLY01-NEXT: [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2 +// SIMD-ONLY01-NEXT: [[CONV8:%.*]] = sext i16 [[TMP9]] to i32 +// SIMD-ONLY01-NEXT: [[TMP10:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY01-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] +// SIMD-ONLY01-NEXT: [[TMP11:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY01-NEXT: call void @llvm.stackrestore(i8* [[TMP11]]) +// SIMD-ONLY01-NEXT: ret i32 [[ADD9]] +// +// +// SIMD-ONLY01-LABEL: define {{[^@]+}}@_ZL7fstatici +// SIMD-ONLY01-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] { +// SIMD-ONLY01-NEXT: entry: +// SIMD-ONLY01-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY01-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY01-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// SIMD-ONLY01-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY01-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: store i8 0, i8* [[AAA]], align 1 +// SIMD-ONLY01-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY01-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[TMP1:%.*]] = load i8, i8* [[AAA]], align 1 +// SIMD-ONLY01-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// SIMD-ONLY01-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY01-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8 +// SIMD-ONLY01-NEXT: store i8 [[CONV2]], i8* [[AAA]], align 1 +// SIMD-ONLY01-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i64 0, i64 2 +// SIMD-ONLY01-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY01-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1 +// SIMD-ONLY01-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY01-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: ret i32 [[TMP3]] +// +// +// SIMD-ONLY01-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// SIMD-ONLY01-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat { +// SIMD-ONLY01-NEXT: entry: +// SIMD-ONLY01-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY01-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY01-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY01-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY01-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i64 0, i64 2 +// SIMD-ONLY01-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY01-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1 +// SIMD-ONLY01-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY01-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: ret i32 [[TMP2]] +// +// +// SIMD-ONLY02-LABEL: define {{[^@]+}}@_Z3fooiPd +// SIMD-ONLY02-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY02-NEXT: entry: +// SIMD-ONLY02-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY02-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// SIMD-ONLY02-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY02-NEXT: [[AA:%.*]] = alloca i16, align 2 +// SIMD-ONLY02-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// SIMD-ONLY02-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// SIMD-ONLY02-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// SIMD-ONLY02-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// SIMD-ONLY02-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// SIMD-ONLY02-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// SIMD-ONLY02-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// SIMD-ONLY02-NEXT: [[P:%.*]] = alloca i32*, align 64 +// SIMD-ONLY02-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: store i16 0, i16* [[AA]], align 2 +// SIMD-ONLY02-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[TMP1:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY02-NEXT: store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY02-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4 +// SIMD-ONLY02-NEXT: store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4 +// SIMD-ONLY02-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]] +// SIMD-ONLY02-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8 +// SIMD-ONLY02-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// SIMD-ONLY02-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY02-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store i32 [[TMP4]], i32* [[X]], align 4 +// SIMD-ONLY02-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// SIMD-ONLY02-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store i32 [[TMP5]], i32* [[Y]], align 4 +// SIMD-ONLY02-NEXT: store i32* [[A]], i32** [[P]], align 64 +// SIMD-ONLY02-NEXT: [[TMP6:%.*]] = load i16, i16* [[AA]], align 2 +// SIMD-ONLY02-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 +// SIMD-ONLY02-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY02-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 +// SIMD-ONLY02-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// SIMD-ONLY02-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B]], i32 0, i32 2 +// SIMD-ONLY02-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// SIMD-ONLY02-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[VLA]], i32 3 +// SIMD-ONLY02-NEXT: store float 1.000000e+00, float* [[ARRAYIDX3]], align 4 +// SIMD-ONLY02-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C]], i32 0, i32 1 +// SIMD-ONLY02-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX4]], i32 0, i32 2 +// SIMD-ONLY02-NEXT: store double 1.000000e+00, double* [[ARRAYIDX5]], align 8 +// SIMD-ONLY02-NEXT: [[TMP7:%.*]] = mul nsw i32 1, [[TMP2]] +// SIMD-ONLY02-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[VLA1]], i32 [[TMP7]] +// SIMD-ONLY02-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX6]], i32 3 +// SIMD-ONLY02-NEXT: store double 1.000000e+00, double* [[ARRAYIDX7]], align 8 +// SIMD-ONLY02-NEXT: [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 0 +// SIMD-ONLY02-NEXT: store i64 1, i64* [[X8]], align 4 +// SIMD-ONLY02-NEXT: [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 1 +// SIMD-ONLY02-NEXT: store i8 1, i8* [[Y9]], align 4 +// SIMD-ONLY02-NEXT: [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY02-NEXT: [[TMP8:%.*]] = load i32, i32* [[X10]], align 4 +// SIMD-ONLY02-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP8]] to double +// SIMD-ONLY02-NEXT: [[TMP9:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP9]], i32 0 +// SIMD-ONLY02-NEXT: store double [[CONV11]], double* [[ARRAYIDX12]], align 4 +// SIMD-ONLY02-NEXT: [[TMP10:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP10]], i32 0 +// SIMD-ONLY02-NEXT: [[TMP11:%.*]] = load double, double* [[ARRAYIDX13]], align 4 +// SIMD-ONLY02-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// SIMD-ONLY02-NEXT: store double [[INC]], double* [[ARRAYIDX13]], align 4 +// SIMD-ONLY02-NEXT: [[TMP12:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY02-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// SIMD-ONLY02-NEXT: ret i32 [[TMP12]] +// +// +// SIMD-ONLY02-LABEL: define {{[^@]+}}@_Z3bariPd +// SIMD-ONLY02-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// SIMD-ONLY02-NEXT: entry: +// SIMD-ONLY02-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY02-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// SIMD-ONLY02-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY02-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 +// SIMD-ONLY02-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]]) +// SIMD-ONLY02-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// SIMD-ONLY02-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]]) +// SIMD-ONLY02-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// SIMD-ONLY02-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]]) +// SIMD-ONLY02-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// SIMD-ONLY02-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]]) +// SIMD-ONLY02-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// SIMD-ONLY02-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: ret i32 [[TMP9]] +// +// +// SIMD-ONLY02-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// SIMD-ONLY02-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// SIMD-ONLY02-NEXT: entry: +// SIMD-ONLY02-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// SIMD-ONLY02-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY02-NEXT: [[B:%.*]] = alloca i32, align 4 +// SIMD-ONLY02-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// SIMD-ONLY02-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// SIMD-ONLY02-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY02-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// SIMD-ONLY02-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY02-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY02-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]] +// SIMD-ONLY02-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2 +// SIMD-ONLY02-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// SIMD-ONLY02-NEXT: [[TMP4:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY02-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// SIMD-ONLY02-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// SIMD-ONLY02-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY02-NEXT: store double [[ADD2]], double* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY02-NEXT: [[TMP5:%.*]] = load double, double* [[A3]], align 4 +// SIMD-ONLY02-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 +// SIMD-ONLY02-NEXT: store double [[INC]], double* [[A3]], align 4 +// SIMD-ONLY02-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// SIMD-ONLY02-NEXT: [[TMP6:%.*]] = mul nsw i32 1, [[TMP1]] +// SIMD-ONLY02-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP6]] +// SIMD-ONLY02-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// SIMD-ONLY02-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2 +// SIMD-ONLY02-NEXT: [[TMP7:%.*]] = mul nsw i32 1, [[TMP1]] +// SIMD-ONLY02-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP7]] +// SIMD-ONLY02-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX6]], i32 1 +// SIMD-ONLY02-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2 +// SIMD-ONLY02-NEXT: [[CONV8:%.*]] = sext i16 [[TMP8]] to i32 +// SIMD-ONLY02-NEXT: [[TMP9:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY02-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP9]] +// SIMD-ONLY02-NEXT: [[TMP10:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY02-NEXT: call void @llvm.stackrestore(i8* [[TMP10]]) +// SIMD-ONLY02-NEXT: ret i32 [[ADD9]] +// +// +// SIMD-ONLY02-LABEL: define {{[^@]+}}@_ZL7fstatici +// SIMD-ONLY02-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] { +// SIMD-ONLY02-NEXT: entry: +// SIMD-ONLY02-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY02-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY02-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// SIMD-ONLY02-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY02-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: store i8 0, i8* [[AAA]], align 1 +// SIMD-ONLY02-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY02-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[TMP1:%.*]] = load i8, i8* [[AAA]], align 1 +// SIMD-ONLY02-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// SIMD-ONLY02-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY02-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8 +// SIMD-ONLY02-NEXT: store i8 [[CONV2]], i8* [[AAA]], align 1 +// SIMD-ONLY02-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i32 0, i32 2 +// SIMD-ONLY02-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY02-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1 +// SIMD-ONLY02-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY02-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: ret i32 [[TMP3]] +// +// +// SIMD-ONLY02-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// SIMD-ONLY02-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat { +// SIMD-ONLY02-NEXT: entry: +// SIMD-ONLY02-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY02-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY02-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY02-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY02-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i32 0, i32 2 +// SIMD-ONLY02-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY02-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1 +// SIMD-ONLY02-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY02-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: ret i32 [[TMP2]] +// +// +// SIMD-ONLY03-LABEL: define {{[^@]+}}@_Z3fooiPd +// SIMD-ONLY03-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY03-NEXT: entry: +// SIMD-ONLY03-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY03-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// SIMD-ONLY03-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY03-NEXT: [[AA:%.*]] = alloca i16, align 2 +// SIMD-ONLY03-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// SIMD-ONLY03-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// SIMD-ONLY03-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// SIMD-ONLY03-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// SIMD-ONLY03-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// SIMD-ONLY03-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// SIMD-ONLY03-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// SIMD-ONLY03-NEXT: [[P:%.*]] = alloca i32*, align 64 +// SIMD-ONLY03-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: store i16 0, i16* [[AA]], align 2 +// SIMD-ONLY03-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[TMP1:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY03-NEXT: store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY03-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4 +// SIMD-ONLY03-NEXT: store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4 +// SIMD-ONLY03-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]] +// SIMD-ONLY03-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8 +// SIMD-ONLY03-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// SIMD-ONLY03-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY03-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store i32 [[TMP4]], i32* [[X]], align 4 +// SIMD-ONLY03-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// SIMD-ONLY03-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store i32 [[TMP5]], i32* [[Y]], align 4 +// SIMD-ONLY03-NEXT: store i32* [[A]], i32** [[P]], align 64 +// SIMD-ONLY03-NEXT: [[TMP6:%.*]] = load i16, i16* [[AA]], align 2 +// SIMD-ONLY03-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 +// SIMD-ONLY03-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY03-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 +// SIMD-ONLY03-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// SIMD-ONLY03-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B]], i32 0, i32 2 +// SIMD-ONLY03-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// SIMD-ONLY03-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[VLA]], i32 3 +// SIMD-ONLY03-NEXT: store float 1.000000e+00, float* [[ARRAYIDX3]], align 4 +// SIMD-ONLY03-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C]], i32 0, i32 1 +// SIMD-ONLY03-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX4]], i32 0, i32 2 +// SIMD-ONLY03-NEXT: store double 1.000000e+00, double* [[ARRAYIDX5]], align 8 +// SIMD-ONLY03-NEXT: [[TMP7:%.*]] = mul nsw i32 1, [[TMP2]] +// SIMD-ONLY03-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[VLA1]], i32 [[TMP7]] +// SIMD-ONLY03-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX6]], i32 3 +// SIMD-ONLY03-NEXT: store double 1.000000e+00, double* [[ARRAYIDX7]], align 8 +// SIMD-ONLY03-NEXT: [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 0 +// SIMD-ONLY03-NEXT: store i64 1, i64* [[X8]], align 4 +// SIMD-ONLY03-NEXT: [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 1 +// SIMD-ONLY03-NEXT: store i8 1, i8* [[Y9]], align 4 +// SIMD-ONLY03-NEXT: [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY03-NEXT: [[TMP8:%.*]] = load i32, i32* [[X10]], align 4 +// SIMD-ONLY03-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP8]] to double +// SIMD-ONLY03-NEXT: [[TMP9:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP9]], i32 0 +// SIMD-ONLY03-NEXT: store double [[CONV11]], double* [[ARRAYIDX12]], align 4 +// SIMD-ONLY03-NEXT: [[TMP10:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP10]], i32 0 +// SIMD-ONLY03-NEXT: [[TMP11:%.*]] = load double, double* [[ARRAYIDX13]], align 4 +// SIMD-ONLY03-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// SIMD-ONLY03-NEXT: store double [[INC]], double* [[ARRAYIDX13]], align 4 +// SIMD-ONLY03-NEXT: [[TMP12:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY03-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// SIMD-ONLY03-NEXT: ret i32 [[TMP12]] +// +// +// SIMD-ONLY03-LABEL: define {{[^@]+}}@_Z3bariPd +// SIMD-ONLY03-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// SIMD-ONLY03-NEXT: entry: +// SIMD-ONLY03-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY03-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// SIMD-ONLY03-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY03-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 +// SIMD-ONLY03-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]]) +// SIMD-ONLY03-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// SIMD-ONLY03-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]]) +// SIMD-ONLY03-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// SIMD-ONLY03-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]]) +// SIMD-ONLY03-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// SIMD-ONLY03-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]]) +// SIMD-ONLY03-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// SIMD-ONLY03-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: ret i32 [[TMP9]] +// +// +// SIMD-ONLY03-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// SIMD-ONLY03-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// SIMD-ONLY03-NEXT: entry: +// SIMD-ONLY03-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// SIMD-ONLY03-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY03-NEXT: [[B:%.*]] = alloca i32, align 4 +// SIMD-ONLY03-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// SIMD-ONLY03-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// SIMD-ONLY03-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY03-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// SIMD-ONLY03-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY03-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY03-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]] +// SIMD-ONLY03-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2 +// SIMD-ONLY03-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// SIMD-ONLY03-NEXT: [[TMP4:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY03-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// SIMD-ONLY03-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// SIMD-ONLY03-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY03-NEXT: store double [[ADD2]], double* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY03-NEXT: [[TMP5:%.*]] = load double, double* [[A3]], align 4 +// SIMD-ONLY03-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 +// SIMD-ONLY03-NEXT: store double [[INC]], double* [[A3]], align 4 +// SIMD-ONLY03-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// SIMD-ONLY03-NEXT: [[TMP6:%.*]] = mul nsw i32 1, [[TMP1]] +// SIMD-ONLY03-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP6]] +// SIMD-ONLY03-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// SIMD-ONLY03-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2 +// SIMD-ONLY03-NEXT: [[TMP7:%.*]] = mul nsw i32 1, [[TMP1]] +// SIMD-ONLY03-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP7]] +// SIMD-ONLY03-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX6]], i32 1 +// SIMD-ONLY03-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2 +// SIMD-ONLY03-NEXT: [[CONV8:%.*]] = sext i16 [[TMP8]] to i32 +// SIMD-ONLY03-NEXT: [[TMP9:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY03-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP9]] +// SIMD-ONLY03-NEXT: [[TMP10:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY03-NEXT: call void @llvm.stackrestore(i8* [[TMP10]]) +// SIMD-ONLY03-NEXT: ret i32 [[ADD9]] +// +// +// SIMD-ONLY03-LABEL: define {{[^@]+}}@_ZL7fstatici +// SIMD-ONLY03-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] { +// SIMD-ONLY03-NEXT: entry: +// SIMD-ONLY03-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY03-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY03-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// SIMD-ONLY03-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY03-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: store i8 0, i8* [[AAA]], align 1 +// SIMD-ONLY03-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY03-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[TMP1:%.*]] = load i8, i8* [[AAA]], align 1 +// SIMD-ONLY03-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// SIMD-ONLY03-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY03-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8 +// SIMD-ONLY03-NEXT: store i8 [[CONV2]], i8* [[AAA]], align 1 +// SIMD-ONLY03-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i32 0, i32 2 +// SIMD-ONLY03-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY03-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1 +// SIMD-ONLY03-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY03-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: ret i32 [[TMP3]] +// +// +// SIMD-ONLY03-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// SIMD-ONLY03-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat { +// SIMD-ONLY03-NEXT: entry: +// SIMD-ONLY03-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY03-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY03-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY03-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY03-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i32 0, i32 2 +// SIMD-ONLY03-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY03-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1 +// SIMD-ONLY03-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY03-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: ret i32 [[TMP2]] +// +// +// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// TCHECK-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] { +// TCHECK-NEXT: entry: +// TCHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 8 +// TCHECK-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// TCHECK-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 8 +// TCHECK-NEXT: store i64 [[GA]], i64* [[GA_ADDR]], align 8 +// TCHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// TCHECK-NEXT: [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32* +// TCHECK-NEXT: ret void +// +// +// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// TCHECK-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] { +// TCHECK-NEXT: entry: +// TCHECK-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 +// TCHECK-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 +// TCHECK-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 +// TCHECK-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 +// TCHECK-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 +// TCHECK-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// TCHECK-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// TCHECK-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// TCHECK-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// TCHECK-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 +// TCHECK-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 +// TCHECK-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// TCHECK-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 +// TCHECK-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 +// TCHECK-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// TCHECK-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 +// TCHECK-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 +// TCHECK-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 +// TCHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// TCHECK-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 +// TCHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// TCHECK-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 +// TCHECK-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 +// TCHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// TCHECK-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 +// TCHECK-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 +// TCHECK-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 +// TCHECK-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// TCHECK-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// TCHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false) +// TCHECK-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// TCHECK-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8 +// TCHECK-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4 +// TCHECK-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// TCHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4 +// TCHECK-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// TCHECK-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// TCHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false) +// TCHECK-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// TCHECK-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// TCHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false) +// TCHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// TCHECK-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8 +// TCHECK-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// TCHECK-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8 +// TCHECK-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// TCHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 +// TCHECK-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// TCHECK-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// TCHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false) +// TCHECK-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// TCHECK-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// TCHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false) +// TCHECK-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// TCHECK-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// TCHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// TCHECK-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// TCHECK-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// TCHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2 +// TCHECK-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// TCHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3 +// TCHECK-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// TCHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1 +// TCHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2 +// TCHECK-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// TCHECK-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]] +// TCHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]] +// TCHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3 +// TCHECK-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// TCHECK-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// TCHECK-NEXT: store i64 1, i64* [[X]], align 8 +// TCHECK-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// TCHECK-NEXT: store i8 1, i8* [[Y]], align 8 +// TCHECK-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// TCHECK-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// TCHECK-NEXT: ret void +// +// +// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// TCHECK-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] { +// TCHECK-NEXT: entry: +// TCHECK-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// TCHECK-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8 +// TCHECK-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// TCHECK-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8 +// TCHECK-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8 +// TCHECK-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0 +// TCHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[X]], align 4 +// TCHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// TCHECK-NEXT: [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// TCHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 0 +// TCHECK-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 8 +// TCHECK-NEXT: [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// TCHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 0 +// TCHECK-NEXT: [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 8 +// TCHECK-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00 +// TCHECK-NEXT: store double [[INC]], double* [[ARRAYIDX1]], align 8 +// TCHECK-NEXT: ret void +// +// +// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// TCHECK-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK-NEXT: entry: +// TCHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// TCHECK-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4 +// TCHECK-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// TCHECK-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 +// TCHECK-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// TCHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// TCHECK-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* +// TCHECK-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// TCHECK-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8* +// TCHECK-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// TCHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// TCHECK-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 +// TCHECK-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32 +// TCHECK-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// TCHECK-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// TCHECK-NEXT: store i8 [[CONV5]], i8* [[CONV1]], align 1 +// TCHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2 +// TCHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 +// TCHECK-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// TCHECK-NEXT: ret void +// +// +// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// TCHECK-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// TCHECK-NEXT: entry: +// TCHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// TCHECK-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// TCHECK-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// TCHECK-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// TCHECK-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 +// TCHECK-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// TCHECK-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// TCHECK-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 +// TCHECK-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// TCHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* +// TCHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// TCHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// TCHECK-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// TCHECK-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// TCHECK-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8 +// TCHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// TCHECK-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2 +// TCHECK-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// TCHECK-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8 +// TCHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// TCHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2 +// TCHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// TCHECK-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// TCHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false) +// TCHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double +// TCHECK-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 +// TCHECK-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK-NEXT: store double [[ADD]], double* [[A]], align 8 +// TCHECK-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK-NEXT: [[TMP11:%.*]] = load double, double* [[A5]], align 8 +// TCHECK-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// TCHECK-NEXT: store double [[INC]], double* [[A5]], align 8 +// TCHECK-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 +// TCHECK-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]] +// TCHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]] +// TCHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// TCHECK-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// TCHECK-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// TCHECK-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// TCHECK-NEXT: ret void +// +// +// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// TCHECK-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK-NEXT: entry: +// TCHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// TCHECK-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// TCHECK-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// TCHECK-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// TCHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// TCHECK-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// TCHECK-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// TCHECK-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// TCHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// TCHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2 +// TCHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// TCHECK-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// TCHECK-NEXT: ret void +// +// +// TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// TCHECK1-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] { +// TCHECK1-NEXT: entry: +// TCHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 8 +// TCHECK1-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// TCHECK1-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 8 +// TCHECK1-NEXT: store i64 [[GA]], i64* [[GA_ADDR]], align 8 +// TCHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// TCHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32* +// TCHECK1-NEXT: ret void +// +// +// TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// TCHECK1-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] { +// TCHECK1-NEXT: entry: +// TCHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 +// TCHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 +// TCHECK1-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 +// TCHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 +// TCHECK1-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 +// TCHECK1-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// TCHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// TCHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// TCHECK1-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// TCHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 +// TCHECK1-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 +// TCHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// TCHECK1-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 +// TCHECK1-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 +// TCHECK1-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// TCHECK1-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 +// TCHECK1-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 +// TCHECK1-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 +// TCHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// TCHECK1-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// TCHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 +// TCHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// TCHECK1-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// TCHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false) +// TCHECK1-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// TCHECK1-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8 +// TCHECK1-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4 +// TCHECK1-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// TCHECK1-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4 +// TCHECK1-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// TCHECK1-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// TCHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false) +// TCHECK1-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// TCHECK1-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// TCHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false) +// TCHECK1-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// TCHECK1-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8 +// TCHECK1-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// TCHECK1-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8 +// TCHECK1-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// TCHECK1-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 +// TCHECK1-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// TCHECK1-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// TCHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false) +// TCHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// TCHECK1-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// TCHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false) +// TCHECK1-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// TCHECK1-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// TCHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// TCHECK1-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// TCHECK1-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// TCHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2 +// TCHECK1-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// TCHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3 +// TCHECK1-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// TCHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1 +// TCHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2 +// TCHECK1-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// TCHECK1-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]] +// TCHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]] +// TCHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3 +// TCHECK1-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// TCHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// TCHECK1-NEXT: store i64 1, i64* [[X]], align 8 +// TCHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// TCHECK1-NEXT: store i8 1, i8* [[Y]], align 8 +// TCHECK1-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// TCHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// TCHECK1-NEXT: ret void +// +// +// TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// TCHECK1-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] { +// TCHECK1-NEXT: entry: +// TCHECK1-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// TCHECK1-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8 +// TCHECK1-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// TCHECK1-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8 +// TCHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0 +// TCHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[X]], align 4 +// TCHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// TCHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// TCHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 0 +// TCHECK1-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 8 +// TCHECK1-NEXT: [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// TCHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 0 +// TCHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 8 +// TCHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00 +// TCHECK1-NEXT: store double [[INC]], double* [[ARRAYIDX1]], align 8 +// TCHECK1-NEXT: ret void +// +// +// TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// TCHECK1-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK1-NEXT: entry: +// TCHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// TCHECK1-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4 +// TCHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// TCHECK1-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 +// TCHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// TCHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// TCHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* +// TCHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8* +// TCHECK1-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// TCHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// TCHECK1-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 +// TCHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32 +// TCHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// TCHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// TCHECK1-NEXT: store i8 [[CONV5]], i8* [[CONV1]], align 1 +// TCHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2 +// TCHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 +// TCHECK1-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// TCHECK1-NEXT: ret void +// +// +// TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// TCHECK1-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// TCHECK1-NEXT: entry: +// TCHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// TCHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// TCHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// TCHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// TCHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 +// TCHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// TCHECK1-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// TCHECK1-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// TCHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* +// TCHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// TCHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// TCHECK1-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8 +// TCHECK1-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// TCHECK1-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2 +// TCHECK1-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// TCHECK1-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8 +// TCHECK1-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// TCHECK1-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2 +// TCHECK1-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// TCHECK1-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// TCHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false) +// TCHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK1-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double +// TCHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 +// TCHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK1-NEXT: store double [[ADD]], double* [[A]], align 8 +// TCHECK1-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK1-NEXT: [[TMP11:%.*]] = load double, double* [[A5]], align 8 +// TCHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// TCHECK1-NEXT: store double [[INC]], double* [[A5]], align 8 +// TCHECK1-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 +// TCHECK1-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]] +// TCHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]] +// TCHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// TCHECK1-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// TCHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// TCHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// TCHECK1-NEXT: ret void +// +// +// TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// TCHECK1-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK1-NEXT: entry: +// TCHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// TCHECK1-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// TCHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// TCHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// TCHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// TCHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// TCHECK1-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// TCHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// TCHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2 +// TCHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// TCHECK1-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// TCHECK1-NEXT: ret void +// +// +// TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// TCHECK2-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] { +// TCHECK2-NEXT: entry: +// TCHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 4 +// TCHECK2-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// TCHECK2-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 4 +// TCHECK2-NEXT: store i32 [[GA]], i32* [[GA_ADDR]], align 4 +// TCHECK2-NEXT: ret void +// +// +// TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// TCHECK2-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR0]] { +// TCHECK2-NEXT: entry: +// TCHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 +// TCHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 +// TCHECK2-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 +// TCHECK2-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 +// TCHECK2-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 +// TCHECK2-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// TCHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// TCHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// TCHECK2-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// TCHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 +// TCHECK2-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 +// TCHECK2-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// TCHECK2-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 +// TCHECK2-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 +// TCHECK2-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// TCHECK2-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 +// TCHECK2-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 +// TCHECK2-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 +// TCHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// TCHECK2-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// TCHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 +// TCHECK2-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// TCHECK2-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// TCHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false) +// TCHECK2-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// TCHECK2-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4 +// TCHECK2-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4 +// TCHECK2-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// TCHECK2-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4 +// TCHECK2-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// TCHECK2-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// TCHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false) +// TCHECK2-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// TCHECK2-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// TCHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false) +// TCHECK2-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// TCHECK2-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8 +// TCHECK2-NEXT: store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4 +// TCHECK2-NEXT: store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4 +// TCHECK2-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// TCHECK2-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8 +// TCHECK2-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// TCHECK2-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// TCHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false) +// TCHECK2-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// TCHECK2-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// TCHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false) +// TCHECK2-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// TCHECK2-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// TCHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// TCHECK2-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// TCHECK2-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// TCHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2 +// TCHECK2-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// TCHECK2-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3 +// TCHECK2-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// TCHECK2-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1 +// TCHECK2-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 +// TCHECK2-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// TCHECK2-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]] +// TCHECK2-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]] +// TCHECK2-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3 +// TCHECK2-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// TCHECK2-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// TCHECK2-NEXT: store i64 1, i64* [[X]], align 4 +// TCHECK2-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// TCHECK2-NEXT: store i8 1, i8* [[Y]], align 4 +// TCHECK2-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// TCHECK2-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// TCHECK2-NEXT: ret void +// +// +// TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// TCHECK2-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] { +// TCHECK2-NEXT: entry: +// TCHECK2-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// TCHECK2-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4 +// TCHECK2-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// TCHECK2-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4 +// TCHECK2-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0 +// TCHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[X]], align 4 +// TCHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// TCHECK2-NEXT: [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// TCHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i32 0 +// TCHECK2-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 4 +// TCHECK2-NEXT: [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// TCHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i32 0 +// TCHECK2-NEXT: [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 4 +// TCHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00 +// TCHECK2-NEXT: store double [[INC]], double* [[ARRAYIDX1]], align 4 +// TCHECK2-NEXT: ret void +// +// +// TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// TCHECK2-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK2-NEXT: entry: +// TCHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// TCHECK2-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// TCHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// TCHECK2-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 +// TCHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// TCHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* +// TCHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// TCHECK2-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// TCHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// TCHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 +// TCHECK2-NEXT: [[CONV2:%.*]] = sext i8 [[TMP4]] to i32 +// TCHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 +// TCHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 +// TCHECK2-NEXT: store i8 [[CONV4]], i8* [[CONV]], align 1 +// TCHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// TCHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1 +// TCHECK2-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 +// TCHECK2-NEXT: ret void +// +// +// TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// TCHECK2-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// TCHECK2-NEXT: entry: +// TCHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// TCHECK2-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// TCHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// TCHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// TCHECK2-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// TCHECK2-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// TCHECK2-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// TCHECK2-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// TCHECK2-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// TCHECK2-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4 +// TCHECK2-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// TCHECK2-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2 +// TCHECK2-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// TCHECK2-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// TCHECK2-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// TCHECK2-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// TCHECK2-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// TCHECK2-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// TCHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false) +// TCHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// TCHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double +// TCHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// TCHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK2-NEXT: store double [[ADD]], double* [[A]], align 4 +// TCHECK2-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK2-NEXT: [[TMP11:%.*]] = load double, double* [[A4]], align 4 +// TCHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// TCHECK2-NEXT: store double [[INC]], double* [[A4]], align 4 +// TCHECK2-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// TCHECK2-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]] +// TCHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]] +// TCHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// TCHECK2-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// TCHECK2-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// TCHECK2-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// TCHECK2-NEXT: ret void +// +// +// TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// TCHECK2-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK2-NEXT: entry: +// TCHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// TCHECK2-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// TCHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// TCHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// TCHECK2-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// TCHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// TCHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// TCHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// TCHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// TCHECK2-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// TCHECK2-NEXT: ret void +// +// +// TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// TCHECK3-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] { +// TCHECK3-NEXT: entry: +// TCHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 4 +// TCHECK3-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// TCHECK3-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 4 +// TCHECK3-NEXT: store i32 [[GA]], i32* [[GA_ADDR]], align 4 +// TCHECK3-NEXT: ret void +// +// +// TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// TCHECK3-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR0]] { +// TCHECK3-NEXT: entry: +// TCHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 +// TCHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 +// TCHECK3-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 +// TCHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 +// TCHECK3-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 +// TCHECK3-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// TCHECK3-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// TCHECK3-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// TCHECK3-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// TCHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 +// TCHECK3-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 +// TCHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// TCHECK3-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 +// TCHECK3-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 +// TCHECK3-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// TCHECK3-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 +// TCHECK3-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 +// TCHECK3-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 +// TCHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// TCHECK3-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// TCHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 +// TCHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// TCHECK3-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// TCHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false) +// TCHECK3-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// TCHECK3-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4 +// TCHECK3-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4 +// TCHECK3-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// TCHECK3-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4 +// TCHECK3-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// TCHECK3-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// TCHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false) +// TCHECK3-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// TCHECK3-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// TCHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false) +// TCHECK3-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// TCHECK3-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8 +// TCHECK3-NEXT: store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4 +// TCHECK3-NEXT: store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4 +// TCHECK3-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// TCHECK3-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8 +// TCHECK3-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// TCHECK3-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// TCHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false) +// TCHECK3-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// TCHECK3-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// TCHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false) +// TCHECK3-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// TCHECK3-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// TCHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// TCHECK3-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// TCHECK3-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// TCHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2 +// TCHECK3-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// TCHECK3-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3 +// TCHECK3-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// TCHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1 +// TCHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 +// TCHECK3-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// TCHECK3-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]] +// TCHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]] +// TCHECK3-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3 +// TCHECK3-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// TCHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// TCHECK3-NEXT: store i64 1, i64* [[X]], align 4 +// TCHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// TCHECK3-NEXT: store i8 1, i8* [[Y]], align 4 +// TCHECK3-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// TCHECK3-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// TCHECK3-NEXT: ret void +// +// +// TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// TCHECK3-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] { +// TCHECK3-NEXT: entry: +// TCHECK3-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// TCHECK3-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4 +// TCHECK3-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// TCHECK3-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4 +// TCHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0 +// TCHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[X]], align 4 +// TCHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// TCHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// TCHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i32 0 +// TCHECK3-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 4 +// TCHECK3-NEXT: [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// TCHECK3-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i32 0 +// TCHECK3-NEXT: [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 4 +// TCHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00 +// TCHECK3-NEXT: store double [[INC]], double* [[ARRAYIDX1]], align 4 +// TCHECK3-NEXT: ret void +// +// +// TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// TCHECK3-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK3-NEXT: entry: +// TCHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// TCHECK3-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// TCHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// TCHECK3-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 +// TCHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// TCHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* +// TCHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// TCHECK3-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// TCHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// TCHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 +// TCHECK3-NEXT: [[CONV2:%.*]] = sext i8 [[TMP4]] to i32 +// TCHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 +// TCHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 +// TCHECK3-NEXT: store i8 [[CONV4]], i8* [[CONV]], align 1 +// TCHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// TCHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1 +// TCHECK3-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 +// TCHECK3-NEXT: ret void +// +// +// TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// TCHECK3-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// TCHECK3-NEXT: entry: +// TCHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// TCHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// TCHECK3-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// TCHECK3-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// TCHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// TCHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// TCHECK3-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// TCHECK3-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// TCHECK3-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// TCHECK3-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4 +// TCHECK3-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// TCHECK3-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2 +// TCHECK3-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// TCHECK3-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// TCHECK3-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// TCHECK3-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// TCHECK3-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// TCHECK3-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// TCHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false) +// TCHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// TCHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double +// TCHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// TCHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK3-NEXT: store double [[ADD]], double* [[A]], align 4 +// TCHECK3-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK3-NEXT: [[TMP11:%.*]] = load double, double* [[A4]], align 4 +// TCHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// TCHECK3-NEXT: store double [[INC]], double* [[A4]], align 4 +// TCHECK3-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// TCHECK3-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]] +// TCHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]] +// TCHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// TCHECK3-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// TCHECK3-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// TCHECK3-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// TCHECK3-NEXT: ret void +// +// +// TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// TCHECK3-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK3-NEXT: entry: +// TCHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// TCHECK3-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// TCHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// TCHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// TCHECK3-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// TCHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// TCHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// TCHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// TCHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// TCHECK3-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// TCHECK3-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_Z3fooiPd +// SIMD-ONLY1-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// SIMD-ONLY1-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// SIMD-ONLY1-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// SIMD-ONLY1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// SIMD-ONLY1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// SIMD-ONLY1-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// SIMD-ONLY1-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// SIMD-ONLY1-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// SIMD-ONLY1-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// SIMD-ONLY1-NEXT: [[P:%.*]] = alloca i32*, align 64 +// SIMD-ONLY1-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: store i16 0, i16* [[AA]], align 2 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY1-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY1-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4 +// SIMD-ONLY1-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// SIMD-ONLY1-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// SIMD-ONLY1-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]] +// SIMD-ONLY1-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8 +// SIMD-ONLY1-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// SIMD-ONLY1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: store i32 [[TMP6]], i32* [[X]], align 4 +// SIMD-ONLY1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// SIMD-ONLY1-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: store i32 [[TMP7]], i32* [[Y]], align 4 +// SIMD-ONLY1-NEXT: store i32* [[A]], i32** [[P]], align 64 +// SIMD-ONLY1-NEXT: [[TMP8:%.*]] = load i16, i16* [[AA]], align 2 +// SIMD-ONLY1-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// SIMD-ONLY1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 +// SIMD-ONLY1-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// SIMD-ONLY1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B]], i64 0, i64 2 +// SIMD-ONLY1-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// SIMD-ONLY1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[VLA]], i64 3 +// SIMD-ONLY1-NEXT: store float 1.000000e+00, float* [[ARRAYIDX3]], align 4 +// SIMD-ONLY1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C]], i64 0, i64 1 +// SIMD-ONLY1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX4]], i64 0, i64 2 +// SIMD-ONLY1-NEXT: store double 1.000000e+00, double* [[ARRAYIDX5]], align 8 +// SIMD-ONLY1-NEXT: [[TMP9:%.*]] = mul nsw i64 1, [[TMP4]] +// SIMD-ONLY1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[VLA1]], i64 [[TMP9]] +// SIMD-ONLY1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX6]], i64 3 +// SIMD-ONLY1-NEXT: store double 1.000000e+00, double* [[ARRAYIDX7]], align 8 +// SIMD-ONLY1-NEXT: [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: store i64 1, i64* [[X8]], align 8 +// SIMD-ONLY1-NEXT: [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 1 +// SIMD-ONLY1-NEXT: store i8 1, i8* [[Y9]], align 8 +// SIMD-ONLY1-NEXT: [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP10:%.*]] = load i32, i32* [[X10]], align 4 +// SIMD-ONLY1-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP10]] to double +// SIMD-ONLY1-NEXT: [[TMP11:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP11]], i64 0 +// SIMD-ONLY1-NEXT: store double [[CONV11]], double* [[ARRAYIDX12]], align 8 +// SIMD-ONLY1-NEXT: [[TMP12:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP12]], i64 0 +// SIMD-ONLY1-NEXT: [[TMP13:%.*]] = load double, double* [[ARRAYIDX13]], align 8 +// SIMD-ONLY1-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 +// SIMD-ONLY1-NEXT: store double [[INC]], double* [[ARRAYIDX13]], align 8 +// SIMD-ONLY1-NEXT: [[TMP14:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[TMP15:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY1-NEXT: call void @llvm.stackrestore(i8* [[TMP15]]) +// SIMD-ONLY1-NEXT: ret i32 [[TMP14]] +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_Z3bariPd +// SIMD-ONLY1-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// SIMD-ONLY1-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 +// SIMD-ONLY1-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]]) +// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// SIMD-ONLY1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]]) +// SIMD-ONLY1-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// SIMD-ONLY1-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]]) +// SIMD-ONLY1-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// SIMD-ONLY1-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]]) +// SIMD-ONLY1-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// SIMD-ONLY1-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: ret i32 [[TMP9]] +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// SIMD-ONLY1-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// SIMD-ONLY1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[B:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// SIMD-ONLY1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// SIMD-ONLY1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY1-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +// SIMD-ONLY1-NEXT: [[TMP3:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY1-NEXT: store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY1-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]] +// SIMD-ONLY1-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2 +// SIMD-ONLY1-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8 +// SIMD-ONLY1-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double +// SIMD-ONLY1-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// SIMD-ONLY1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: store double [[ADD2]], double* [[A]], align 8 +// SIMD-ONLY1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP6:%.*]] = load double, double* [[A3]], align 8 +// SIMD-ONLY1-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// SIMD-ONLY1-NEXT: store double [[INC]], double* [[A3]], align 8 +// SIMD-ONLY1-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// SIMD-ONLY1-NEXT: [[TMP7:%.*]] = mul nsw i64 1, [[TMP2]] +// SIMD-ONLY1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP7]] +// SIMD-ONLY1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// SIMD-ONLY1-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2 +// SIMD-ONLY1-NEXT: [[TMP8:%.*]] = mul nsw i64 1, [[TMP2]] +// SIMD-ONLY1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP8]] +// SIMD-ONLY1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX6]], i64 1 +// SIMD-ONLY1-NEXT: [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2 +// SIMD-ONLY1-NEXT: [[CONV8:%.*]] = sext i16 [[TMP9]] to i32 +// SIMD-ONLY1-NEXT: [[TMP10:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY1-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] +// SIMD-ONLY1-NEXT: [[TMP11:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY1-NEXT: call void @llvm.stackrestore(i8* [[TMP11]]) +// SIMD-ONLY1-NEXT: ret i32 [[ADD9]] +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZL7fstatici +// SIMD-ONLY1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// SIMD-ONLY1-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY1-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: store i8 0, i8* [[AAA]], align 1 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load i8, i8* [[AAA]], align 1 +// SIMD-ONLY1-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// SIMD-ONLY1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8 +// SIMD-ONLY1-NEXT: store i8 [[CONV2]], i8* [[AAA]], align 1 +// SIMD-ONLY1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i64 0, i64 2 +// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1 +// SIMD-ONLY1-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY1-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: ret i32 [[TMP3]] +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// SIMD-ONLY1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY1-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i64 0, i64 2 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY1-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1 +// SIMD-ONLY1-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: ret i32 [[TMP2]] +// +// +// SIMD-ONLY11-LABEL: define {{[^@]+}}@_Z3fooiPd +// SIMD-ONLY11-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY11-NEXT: entry: +// SIMD-ONLY11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY11-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// SIMD-ONLY11-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY11-NEXT: [[AA:%.*]] = alloca i16, align 2 +// SIMD-ONLY11-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// SIMD-ONLY11-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// SIMD-ONLY11-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// SIMD-ONLY11-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// SIMD-ONLY11-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// SIMD-ONLY11-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// SIMD-ONLY11-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// SIMD-ONLY11-NEXT: [[P:%.*]] = alloca i32*, align 64 +// SIMD-ONLY11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// SIMD-ONLY11-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: store i16 0, i16* [[AA]], align 2 +// SIMD-ONLY11-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// SIMD-ONLY11-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY11-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY11-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4 +// SIMD-ONLY11-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// SIMD-ONLY11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// SIMD-ONLY11-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]] +// SIMD-ONLY11-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8 +// SIMD-ONLY11-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// SIMD-ONLY11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY11-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: store i32 [[TMP6]], i32* [[X]], align 4 +// SIMD-ONLY11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// SIMD-ONLY11-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: store i32 [[TMP7]], i32* [[Y]], align 4 +// SIMD-ONLY11-NEXT: store i32* [[A]], i32** [[P]], align 64 +// SIMD-ONLY11-NEXT: [[TMP8:%.*]] = load i16, i16* [[AA]], align 2 +// SIMD-ONLY11-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// SIMD-ONLY11-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 +// SIMD-ONLY11-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// SIMD-ONLY11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B]], i64 0, i64 2 +// SIMD-ONLY11-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// SIMD-ONLY11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[VLA]], i64 3 +// SIMD-ONLY11-NEXT: store float 1.000000e+00, float* [[ARRAYIDX3]], align 4 +// SIMD-ONLY11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C]], i64 0, i64 1 +// SIMD-ONLY11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX4]], i64 0, i64 2 +// SIMD-ONLY11-NEXT: store double 1.000000e+00, double* [[ARRAYIDX5]], align 8 +// SIMD-ONLY11-NEXT: [[TMP9:%.*]] = mul nsw i64 1, [[TMP4]] +// SIMD-ONLY11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[VLA1]], i64 [[TMP9]] +// SIMD-ONLY11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX6]], i64 3 +// SIMD-ONLY11-NEXT: store double 1.000000e+00, double* [[ARRAYIDX7]], align 8 +// SIMD-ONLY11-NEXT: [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 0 +// SIMD-ONLY11-NEXT: store i64 1, i64* [[X8]], align 8 +// SIMD-ONLY11-NEXT: [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 1 +// SIMD-ONLY11-NEXT: store i8 1, i8* [[Y9]], align 8 +// SIMD-ONLY11-NEXT: [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY11-NEXT: [[TMP10:%.*]] = load i32, i32* [[X10]], align 4 +// SIMD-ONLY11-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP10]] to double +// SIMD-ONLY11-NEXT: [[TMP11:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// SIMD-ONLY11-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP11]], i64 0 +// SIMD-ONLY11-NEXT: store double [[CONV11]], double* [[ARRAYIDX12]], align 8 +// SIMD-ONLY11-NEXT: [[TMP12:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// SIMD-ONLY11-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP12]], i64 0 +// SIMD-ONLY11-NEXT: [[TMP13:%.*]] = load double, double* [[ARRAYIDX13]], align 8 +// SIMD-ONLY11-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 +// SIMD-ONLY11-NEXT: store double [[INC]], double* [[ARRAYIDX13]], align 8 +// SIMD-ONLY11-NEXT: [[TMP14:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[TMP15:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY11-NEXT: call void @llvm.stackrestore(i8* [[TMP15]]) +// SIMD-ONLY11-NEXT: ret i32 [[TMP14]] +// +// +// SIMD-ONLY11-LABEL: define {{[^@]+}}@_Z3bariPd +// SIMD-ONLY11-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// SIMD-ONLY11-NEXT: entry: +// SIMD-ONLY11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY11-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// SIMD-ONLY11-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY11-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 +// SIMD-ONLY11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// SIMD-ONLY11-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// SIMD-ONLY11-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]]) +// SIMD-ONLY11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// SIMD-ONLY11-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]]) +// SIMD-ONLY11-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// SIMD-ONLY11-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]]) +// SIMD-ONLY11-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// SIMD-ONLY11-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]]) +// SIMD-ONLY11-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// SIMD-ONLY11-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: ret i32 [[TMP9]] +// +// +// SIMD-ONLY11-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// SIMD-ONLY11-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// SIMD-ONLY11-NEXT: entry: +// SIMD-ONLY11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// SIMD-ONLY11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY11-NEXT: [[B:%.*]] = alloca i32, align 4 +// SIMD-ONLY11-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// SIMD-ONLY11-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// SIMD-ONLY11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// SIMD-ONLY11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// SIMD-ONLY11-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY11-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// SIMD-ONLY11-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +// SIMD-ONLY11-NEXT: [[TMP3:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY11-NEXT: store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY11-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]] +// SIMD-ONLY11-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2 +// SIMD-ONLY11-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8 +// SIMD-ONLY11-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double +// SIMD-ONLY11-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// SIMD-ONLY11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY11-NEXT: store double [[ADD2]], double* [[A]], align 8 +// SIMD-ONLY11-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY11-NEXT: [[TMP6:%.*]] = load double, double* [[A3]], align 8 +// SIMD-ONLY11-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// SIMD-ONLY11-NEXT: store double [[INC]], double* [[A3]], align 8 +// SIMD-ONLY11-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// SIMD-ONLY11-NEXT: [[TMP7:%.*]] = mul nsw i64 1, [[TMP2]] +// SIMD-ONLY11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP7]] +// SIMD-ONLY11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// SIMD-ONLY11-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2 +// SIMD-ONLY11-NEXT: [[TMP8:%.*]] = mul nsw i64 1, [[TMP2]] +// SIMD-ONLY11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP8]] +// SIMD-ONLY11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX6]], i64 1 +// SIMD-ONLY11-NEXT: [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2 +// SIMD-ONLY11-NEXT: [[CONV8:%.*]] = sext i16 [[TMP9]] to i32 +// SIMD-ONLY11-NEXT: [[TMP10:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY11-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] +// SIMD-ONLY11-NEXT: [[TMP11:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY11-NEXT: call void @llvm.stackrestore(i8* [[TMP11]]) +// SIMD-ONLY11-NEXT: ret i32 [[ADD9]] +// +// +// SIMD-ONLY11-LABEL: define {{[^@]+}}@_ZL7fstatici +// SIMD-ONLY11-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] { +// SIMD-ONLY11-NEXT: entry: +// SIMD-ONLY11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY11-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY11-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// SIMD-ONLY11-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: store i8 0, i8* [[AAA]], align 1 +// SIMD-ONLY11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY11-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[TMP1:%.*]] = load i8, i8* [[AAA]], align 1 +// SIMD-ONLY11-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// SIMD-ONLY11-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8 +// SIMD-ONLY11-NEXT: store i8 [[CONV2]], i8* [[AAA]], align 1 +// SIMD-ONLY11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i64 0, i64 2 +// SIMD-ONLY11-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1 +// SIMD-ONLY11-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY11-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: ret i32 [[TMP3]] +// +// +// SIMD-ONLY11-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// SIMD-ONLY11-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat { +// SIMD-ONLY11-NEXT: entry: +// SIMD-ONLY11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY11-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY11-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY11-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i64 0, i64 2 +// SIMD-ONLY11-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY11-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1 +// SIMD-ONLY11-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: ret i32 [[TMP2]] +// +// +// SIMD-ONLY12-LABEL: define {{[^@]+}}@_Z3fooiPd +// SIMD-ONLY12-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY12-NEXT: entry: +// SIMD-ONLY12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY12-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// SIMD-ONLY12-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY12-NEXT: [[AA:%.*]] = alloca i16, align 2 +// SIMD-ONLY12-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// SIMD-ONLY12-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// SIMD-ONLY12-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// SIMD-ONLY12-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// SIMD-ONLY12-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// SIMD-ONLY12-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// SIMD-ONLY12-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// SIMD-ONLY12-NEXT: [[P:%.*]] = alloca i32*, align 64 +// SIMD-ONLY12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: store i16 0, i16* [[AA]], align 2 +// SIMD-ONLY12-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[TMP1:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY12-NEXT: store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY12-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4 +// SIMD-ONLY12-NEXT: store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4 +// SIMD-ONLY12-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]] +// SIMD-ONLY12-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8 +// SIMD-ONLY12-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// SIMD-ONLY12-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY12-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store i32 [[TMP4]], i32* [[X]], align 4 +// SIMD-ONLY12-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// SIMD-ONLY12-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store i32 [[TMP5]], i32* [[Y]], align 4 +// SIMD-ONLY12-NEXT: store i32* [[A]], i32** [[P]], align 64 +// SIMD-ONLY12-NEXT: [[TMP6:%.*]] = load i16, i16* [[AA]], align 2 +// SIMD-ONLY12-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 +// SIMD-ONLY12-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY12-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 +// SIMD-ONLY12-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// SIMD-ONLY12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B]], i32 0, i32 2 +// SIMD-ONLY12-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// SIMD-ONLY12-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[VLA]], i32 3 +// SIMD-ONLY12-NEXT: store float 1.000000e+00, float* [[ARRAYIDX3]], align 4 +// SIMD-ONLY12-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C]], i32 0, i32 1 +// SIMD-ONLY12-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX4]], i32 0, i32 2 +// SIMD-ONLY12-NEXT: store double 1.000000e+00, double* [[ARRAYIDX5]], align 8 +// SIMD-ONLY12-NEXT: [[TMP7:%.*]] = mul nsw i32 1, [[TMP2]] +// SIMD-ONLY12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[VLA1]], i32 [[TMP7]] +// SIMD-ONLY12-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX6]], i32 3 +// SIMD-ONLY12-NEXT: store double 1.000000e+00, double* [[ARRAYIDX7]], align 8 +// SIMD-ONLY12-NEXT: [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 0 +// SIMD-ONLY12-NEXT: store i64 1, i64* [[X8]], align 4 +// SIMD-ONLY12-NEXT: [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 1 +// SIMD-ONLY12-NEXT: store i8 1, i8* [[Y9]], align 4 +// SIMD-ONLY12-NEXT: [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY12-NEXT: [[TMP8:%.*]] = load i32, i32* [[X10]], align 4 +// SIMD-ONLY12-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP8]] to double +// SIMD-ONLY12-NEXT: [[TMP9:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP9]], i32 0 +// SIMD-ONLY12-NEXT: store double [[CONV11]], double* [[ARRAYIDX12]], align 4 +// SIMD-ONLY12-NEXT: [[TMP10:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP10]], i32 0 +// SIMD-ONLY12-NEXT: [[TMP11:%.*]] = load double, double* [[ARRAYIDX13]], align 4 +// SIMD-ONLY12-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// SIMD-ONLY12-NEXT: store double [[INC]], double* [[ARRAYIDX13]], align 4 +// SIMD-ONLY12-NEXT: [[TMP12:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY12-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// SIMD-ONLY12-NEXT: ret i32 [[TMP12]] +// +// +// SIMD-ONLY12-LABEL: define {{[^@]+}}@_Z3bariPd +// SIMD-ONLY12-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// SIMD-ONLY12-NEXT: entry: +// SIMD-ONLY12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY12-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// SIMD-ONLY12-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY12-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 +// SIMD-ONLY12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]]) +// SIMD-ONLY12-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// SIMD-ONLY12-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]]) +// SIMD-ONLY12-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// SIMD-ONLY12-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]]) +// SIMD-ONLY12-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// SIMD-ONLY12-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]]) +// SIMD-ONLY12-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// SIMD-ONLY12-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: ret i32 [[TMP9]] +// +// +// SIMD-ONLY12-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// SIMD-ONLY12-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// SIMD-ONLY12-NEXT: entry: +// SIMD-ONLY12-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// SIMD-ONLY12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY12-NEXT: [[B:%.*]] = alloca i32, align 4 +// SIMD-ONLY12-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// SIMD-ONLY12-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// SIMD-ONLY12-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY12-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// SIMD-ONLY12-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY12-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY12-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]] +// SIMD-ONLY12-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2 +// SIMD-ONLY12-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// SIMD-ONLY12-NEXT: [[TMP4:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY12-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// SIMD-ONLY12-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// SIMD-ONLY12-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY12-NEXT: store double [[ADD2]], double* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY12-NEXT: [[TMP5:%.*]] = load double, double* [[A3]], align 4 +// SIMD-ONLY12-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 +// SIMD-ONLY12-NEXT: store double [[INC]], double* [[A3]], align 4 +// SIMD-ONLY12-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// SIMD-ONLY12-NEXT: [[TMP6:%.*]] = mul nsw i32 1, [[TMP1]] +// SIMD-ONLY12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP6]] +// SIMD-ONLY12-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// SIMD-ONLY12-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2 +// SIMD-ONLY12-NEXT: [[TMP7:%.*]] = mul nsw i32 1, [[TMP1]] +// SIMD-ONLY12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP7]] +// SIMD-ONLY12-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX6]], i32 1 +// SIMD-ONLY12-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2 +// SIMD-ONLY12-NEXT: [[CONV8:%.*]] = sext i16 [[TMP8]] to i32 +// SIMD-ONLY12-NEXT: [[TMP9:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY12-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP9]] +// SIMD-ONLY12-NEXT: [[TMP10:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY12-NEXT: call void @llvm.stackrestore(i8* [[TMP10]]) +// SIMD-ONLY12-NEXT: ret i32 [[ADD9]] +// +// +// SIMD-ONLY12-LABEL: define {{[^@]+}}@_ZL7fstatici +// SIMD-ONLY12-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] { +// SIMD-ONLY12-NEXT: entry: +// SIMD-ONLY12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY12-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY12-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// SIMD-ONLY12-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: store i8 0, i8* [[AAA]], align 1 +// SIMD-ONLY12-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY12-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[TMP1:%.*]] = load i8, i8* [[AAA]], align 1 +// SIMD-ONLY12-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// SIMD-ONLY12-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY12-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8 +// SIMD-ONLY12-NEXT: store i8 [[CONV2]], i8* [[AAA]], align 1 +// SIMD-ONLY12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i32 0, i32 2 +// SIMD-ONLY12-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1 +// SIMD-ONLY12-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY12-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: ret i32 [[TMP3]] +// +// +// SIMD-ONLY12-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// SIMD-ONLY12-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat { +// SIMD-ONLY12-NEXT: entry: +// SIMD-ONLY12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY12-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY12-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY12-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i32 0, i32 2 +// SIMD-ONLY12-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY12-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1 +// SIMD-ONLY12-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY12-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: ret i32 [[TMP2]] +// +// +// SIMD-ONLY13-LABEL: define {{[^@]+}}@_Z3fooiPd +// SIMD-ONLY13-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY13-NEXT: entry: +// SIMD-ONLY13-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY13-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// SIMD-ONLY13-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY13-NEXT: [[AA:%.*]] = alloca i16, align 2 +// SIMD-ONLY13-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// SIMD-ONLY13-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// SIMD-ONLY13-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// SIMD-ONLY13-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// SIMD-ONLY13-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// SIMD-ONLY13-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// SIMD-ONLY13-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// SIMD-ONLY13-NEXT: [[P:%.*]] = alloca i32*, align 64 +// SIMD-ONLY13-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: store i16 0, i16* [[AA]], align 2 +// SIMD-ONLY13-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[TMP1:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY13-NEXT: store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY13-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4 +// SIMD-ONLY13-NEXT: store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4 +// SIMD-ONLY13-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]] +// SIMD-ONLY13-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8 +// SIMD-ONLY13-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// SIMD-ONLY13-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY13-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store i32 [[TMP4]], i32* [[X]], align 4 +// SIMD-ONLY13-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// SIMD-ONLY13-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store i32 [[TMP5]], i32* [[Y]], align 4 +// SIMD-ONLY13-NEXT: store i32* [[A]], i32** [[P]], align 64 +// SIMD-ONLY13-NEXT: [[TMP6:%.*]] = load i16, i16* [[AA]], align 2 +// SIMD-ONLY13-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 +// SIMD-ONLY13-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY13-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 +// SIMD-ONLY13-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// SIMD-ONLY13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B]], i32 0, i32 2 +// SIMD-ONLY13-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// SIMD-ONLY13-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[VLA]], i32 3 +// SIMD-ONLY13-NEXT: store float 1.000000e+00, float* [[ARRAYIDX3]], align 4 +// SIMD-ONLY13-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C]], i32 0, i32 1 +// SIMD-ONLY13-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX4]], i32 0, i32 2 +// SIMD-ONLY13-NEXT: store double 1.000000e+00, double* [[ARRAYIDX5]], align 8 +// SIMD-ONLY13-NEXT: [[TMP7:%.*]] = mul nsw i32 1, [[TMP2]] +// SIMD-ONLY13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[VLA1]], i32 [[TMP7]] +// SIMD-ONLY13-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX6]], i32 3 +// SIMD-ONLY13-NEXT: store double 1.000000e+00, double* [[ARRAYIDX7]], align 8 +// SIMD-ONLY13-NEXT: [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 0 +// SIMD-ONLY13-NEXT: store i64 1, i64* [[X8]], align 4 +// SIMD-ONLY13-NEXT: [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 1 +// SIMD-ONLY13-NEXT: store i8 1, i8* [[Y9]], align 4 +// SIMD-ONLY13-NEXT: [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY13-NEXT: [[TMP8:%.*]] = load i32, i32* [[X10]], align 4 +// SIMD-ONLY13-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP8]] to double +// SIMD-ONLY13-NEXT: [[TMP9:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP9]], i32 0 +// SIMD-ONLY13-NEXT: store double [[CONV11]], double* [[ARRAYIDX12]], align 4 +// SIMD-ONLY13-NEXT: [[TMP10:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP10]], i32 0 +// SIMD-ONLY13-NEXT: [[TMP11:%.*]] = load double, double* [[ARRAYIDX13]], align 4 +// SIMD-ONLY13-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// SIMD-ONLY13-NEXT: store double [[INC]], double* [[ARRAYIDX13]], align 4 +// SIMD-ONLY13-NEXT: [[TMP12:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY13-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// SIMD-ONLY13-NEXT: ret i32 [[TMP12]] +// +// +// SIMD-ONLY13-LABEL: define {{[^@]+}}@_Z3bariPd +// SIMD-ONLY13-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// SIMD-ONLY13-NEXT: entry: +// SIMD-ONLY13-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY13-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// SIMD-ONLY13-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY13-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 +// SIMD-ONLY13-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]]) +// SIMD-ONLY13-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// SIMD-ONLY13-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]]) +// SIMD-ONLY13-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// SIMD-ONLY13-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]]) +// SIMD-ONLY13-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// SIMD-ONLY13-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]]) +// SIMD-ONLY13-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// SIMD-ONLY13-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: ret i32 [[TMP9]] +// +// +// SIMD-ONLY13-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// SIMD-ONLY13-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// SIMD-ONLY13-NEXT: entry: +// SIMD-ONLY13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// SIMD-ONLY13-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY13-NEXT: [[B:%.*]] = alloca i32, align 4 +// SIMD-ONLY13-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// SIMD-ONLY13-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// SIMD-ONLY13-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY13-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// SIMD-ONLY13-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY13-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY13-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]] +// SIMD-ONLY13-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2 +// SIMD-ONLY13-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// SIMD-ONLY13-NEXT: [[TMP4:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY13-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// SIMD-ONLY13-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// SIMD-ONLY13-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY13-NEXT: store double [[ADD2]], double* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY13-NEXT: [[TMP5:%.*]] = load double, double* [[A3]], align 4 +// SIMD-ONLY13-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 +// SIMD-ONLY13-NEXT: store double [[INC]], double* [[A3]], align 4 +// SIMD-ONLY13-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// SIMD-ONLY13-NEXT: [[TMP6:%.*]] = mul nsw i32 1, [[TMP1]] +// SIMD-ONLY13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP6]] +// SIMD-ONLY13-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// SIMD-ONLY13-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2 +// SIMD-ONLY13-NEXT: [[TMP7:%.*]] = mul nsw i32 1, [[TMP1]] +// SIMD-ONLY13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP7]] +// SIMD-ONLY13-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX6]], i32 1 +// SIMD-ONLY13-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2 +// SIMD-ONLY13-NEXT: [[CONV8:%.*]] = sext i16 [[TMP8]] to i32 +// SIMD-ONLY13-NEXT: [[TMP9:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY13-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP9]] +// SIMD-ONLY13-NEXT: [[TMP10:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY13-NEXT: call void @llvm.stackrestore(i8* [[TMP10]]) +// SIMD-ONLY13-NEXT: ret i32 [[ADD9]] +// +// +// SIMD-ONLY13-LABEL: define {{[^@]+}}@_ZL7fstatici +// SIMD-ONLY13-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] { +// SIMD-ONLY13-NEXT: entry: +// SIMD-ONLY13-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY13-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY13-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// SIMD-ONLY13-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY13-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: store i8 0, i8* [[AAA]], align 1 +// SIMD-ONLY13-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY13-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[TMP1:%.*]] = load i8, i8* [[AAA]], align 1 +// SIMD-ONLY13-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// SIMD-ONLY13-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY13-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8 +// SIMD-ONLY13-NEXT: store i8 [[CONV2]], i8* [[AAA]], align 1 +// SIMD-ONLY13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i32 0, i32 2 +// SIMD-ONLY13-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1 +// SIMD-ONLY13-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY13-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: ret i32 [[TMP3]] +// +// +// SIMD-ONLY13-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// SIMD-ONLY13-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat { +// SIMD-ONLY13-NEXT: entry: +// SIMD-ONLY13-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY13-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY13-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY13-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY13-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i32 0, i32 2 +// SIMD-ONLY13-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1 +// SIMD-ONLY13-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY13-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: ret i32 [[TMP2]] +// diff --git a/clang/test/OpenMP/target_is_device_ptr_codegen.cpp b/clang/test/OpenMP/target_is_device_ptr_codegen.cpp index b010d0dcea192..09c4d568bb096 100644 --- a/clang/test/OpenMP/target_is_device_ptr_codegen.cpp +++ b/clang/test/OpenMP/target_is_device_ptr_codegen.cpp @@ -1,193 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ // expected-no-diagnostics #ifndef HEADER #define HEADER ///==========================================================================/// -// RUN: %clang_cc1 -DCK1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK10 // RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 -// RUN: %clang_cc1 -DCK1 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK11 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK12 // RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK13 -// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY00 %s // RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY01 %s +// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY02 %s // RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY03 %s #ifdef CK1 double *g; -// CK1: @g ={{.*}} global ptr -// CK1: [[SIZES00:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}] -// CK1: [[TYPES00:@.+]] = {{.+}}constant [1 x i64] [i64 288] -// CK1: [[SIZES01:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] {{8|4}}] -// CK1: [[TYPES01:@.+]] = {{.+}}constant [1 x i64] [i64 288] -// CK1: [[SIZES02:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] {{8|4}}] -// CK1: [[TYPES02:@.+]] = {{.+}}constant [1 x i64] [i64 288] -// CK1: [[SIZES03:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] {{8|4}}] -// CK1: [[TYPES03:@.+]] = {{.+}}constant [1 x i64] [i64 288] -// CK1: [[SIZES04:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] {{8|4}}] -// CK1: [[TYPES04:@.+]] = {{.+}}constant [1 x i64] [i64 288] -// CK1: [[SIZES05:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] {{8|4}}] -// CK1: [[TYPES05:@.+]] = {{.+}}constant [1 x i64] [i64 288] -// CK1: [[SIZES06:@.+]] = {{.+}}constant [2 x i[[sz]]] [i[[sz]] {{8|4}}, i[[sz]] {{8|4}}] -// CK1: [[TYPES06:@.+]] = {{.+}}constant [2 x i64] [i64 288, i64 288] -// CK1-LABEL: @_Z3foo{{.*}}( template void foo(float *&lr, T *&tr) { float *l; T *t; -// CK1-DAG: [[RET:%.+]] = call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 [[DEVICE:.+]], i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) -// CK1-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2 -// CK1-DAG: store ptr [[BPGEP:%.+]], ptr [[BPARG]] -// CK1-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3 -// CK1-DAG: store ptr [[PGEP:%.+]], ptr [[PARG]] -// CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 -// CK1-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 -// CK1-DAG: store ptr [[VAL:%.+]], ptr [[BP1]] -// CK1-DAG: store ptr [[VAL]], ptr [[P1]] -// CK1-DAG: [[VAL]] = load ptr, ptr [[ADDR:@g]], - -// CK1: call void [[KERNEL:@.+]](ptr [[VAL]]) + #pragma omp target is_device_ptr(g) { ++g; } -// CK1-DAG: [[RET:%.+]] = call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 [[DEVICE:.+]], i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) -// CK1-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2 -// CK1-DAG: store ptr [[BPGEP:%.+]], ptr [[BPARG]] -// CK1-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3 -// CK1-DAG: store ptr [[PGEP:%.+]], ptr [[PARG]] -// CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 -// CK1-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 -// CK1-DAG: store ptr [[VAL:%.+]], ptr [[BP1]] -// CK1-DAG: store ptr [[VAL]], ptr [[P1]] -// CK1-DAG: [[VAL]] = load ptr, ptr [[ADDR:%.+]], - -// CK1: call void [[KERNEL:@.+]](ptr [[VAL]]) + #pragma omp target is_device_ptr(l) { ++l; } -// CK1-DAG: [[RET:%.+]] = call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 [[DEVICE:.+]], i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) -// CK1-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2 -// CK1-DAG: store ptr [[BPGEP:%.+]], ptr [[BPARG]] -// CK1-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3 -// CK1-DAG: store ptr [[PGEP:%.+]], ptr [[PARG]] -// CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 -// CK1-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 -// CK1-DAG: store ptr [[VAL:%.+]], ptr [[BP1]] -// CK1-DAG: store ptr [[VAL]], ptr [[P1]] -// CK1-DAG: [[VAL]] = load ptr, ptr [[ADDR:%.+]], - -// CK1: call void [[KERNEL:@.+]](ptr [[VAL]]) + #pragma omp target is_device_ptr(t) { ++t; } -// CK1-DAG: [[RET:%.+]] = call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 [[DEVICE:.+]], i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) -// CK1-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2 -// CK1-DAG: store ptr [[BPGEP:%.+]], ptr [[BPARG]] -// CK1-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3 -// CK1-DAG: store ptr [[PGEP:%.+]], ptr [[PARG]] -// CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 -// CK1-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 -// CK1-DAG: store ptr [[VAL:%.+]], ptr [[BP1]] -// CK1-DAG: store ptr [[VAL]], ptr [[P1]] -// CK1-DAG: [[VAL]] = load ptr, ptr [[ADDR:%.+]], -// CK1-DAG: [[ADDR]] = load ptr, ptr [[ADDR2:%.+]], - -// CK1: call void [[KERNEL:@.+]](ptr [[VAL]]) + #pragma omp target is_device_ptr(lr) { ++lr; } -// CK1-DAG: [[RET:%.+]] = call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 [[DEVICE:.+]], i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) -// CK1-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2 -// CK1-DAG: store ptr [[BPGEP:%.+]], ptr [[BPARG]] -// CK1-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3 -// CK1-DAG: store ptr [[PGEP:%.+]], ptr [[PARG]] -// CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 -// CK1-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 -// CK1-DAG: store ptr [[VAL:%.+]], ptr [[BP1]] -// CK1-DAG: store ptr [[VAL]], ptr [[P1]] -// CK1-DAG: [[VAL]] = load ptr, ptr [[ADDR:%.+]], -// CK1-DAG: [[ADDR]] = load ptr, ptr [[ADDR2:%.+]], - -// CK1: call void [[KERNEL:@.+]](ptr [[VAL]]) + #pragma omp target is_device_ptr(tr) { ++tr; } -// CK1-DAG: [[RET:%.+]] = call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 [[DEVICE:.+]], i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) -// CK1-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2 -// CK1-DAG: store ptr [[BPGEP:%.+]], ptr [[BPARG]] -// CK1-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3 -// CK1-DAG: store ptr [[PGEP:%.+]], ptr [[PARG]] -// CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 -// CK1-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 -// CK1-DAG: store ptr [[VAL:%.+]], ptr [[BP1]] -// CK1-DAG: store ptr [[VAL]], ptr [[P1]] -// CK1-DAG: [[VAL]] = load ptr, ptr [[ADDR:%.+]], -// CK1-DAG: [[ADDR]] = load ptr, ptr [[ADDR2:%.+]], - -// CK1: call void [[KERNEL:@.+]](ptr [[VAL]]) + #pragma omp target is_device_ptr(tr, lr) { ++tr; } -// CK1-DAG: [[RET:%.+]] = call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 [[DEVICE:.+]], i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) -// CK1-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2 -// CK1-DAG: store ptr [[BPGEP:%.+]], ptr [[BPARG]] -// CK1-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3 -// CK1-DAG: store ptr [[PGEP:%.+]], ptr [[PARG]] -// CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 -// CK1-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 -// CK1-DAG: store ptr [[VAL:%.+]], ptr [[BP1]] -// CK1-DAG: store ptr [[VAL]], ptr [[P1]] -// CK1-DAG: [[VAL]] = load ptr, ptr [[ADDR:%.+]], -// CK1-DAG: [[ADDR]] = load ptr, ptr [[ADDR2:%.+]], - -// CK1-DAG: [[_BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1 -// CK1-DAG: [[_P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1 -// CK1-DAG: store ptr [[_VAL:%.+]], ptr [[_BP1]] -// CK1-DAG: store ptr [[_VAL]], ptr [[_P1]] -// CK1-DAG: [[_VAL]] = load ptr, ptr [[_ADDR:%.+]], -// CK1-DAG: [[_ADDR]] = load ptr, ptr [[_ADDR2:%.+]], - -// CK1: call void [[KERNEL:@.+]](ptr [[VAL]], ptr [[_VAL]]) + + #pragma omp target is_device_ptr(tr, lr) { ++tr,++lr; @@ -200,23 +83,21 @@ void bar(float *&a, int *&b) { #endif ///==========================================================================/// -// RUN: %clang_cc1 -DCK2 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK20 // RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 -// RUN: %clang_cc1 -DCK2 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK21 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK22 // RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK23 -// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY10 %s // RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s -// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY11 %s +// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY12 %s // RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s -// SIMD-ONLY1-NOT: {{__kmpc|__tgt}} +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY13 %s #ifdef CK2 -// CK2: [[ST:%.+]] = type { ptr, ptr } template struct ST { @@ -224,70 +105,23 @@ struct ST { double *&b; ST(double *&b) : a(0), b(b) {} - // CK2-LABEL: @{{.*}}foo{{.*}} void foo(double *&arg) { int *la = 0; -// CK2-DAG: [[RET:%.+]] = call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 [[DEVICE:.+]], i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) -// CK2-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2 -// CK2-DAG: store ptr [[BPGEP:%.+]], ptr [[BPARG]] -// CK2-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3 -// CK2-DAG: store ptr [[PGEP:%.+]], ptr [[PARG]] -// CK2-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] -// CK2-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] - -// CK2-DAG: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1:%.+]], i32 0, i32 0 -// CK2-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 -// CK2-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 -// CK2-DAG: store ptr [[THIS1]], ptr [[BP0]] -// CK2-DAG: store ptr [[A]], ptr [[P0]] + #pragma omp target is_device_ptr(a) { a++; } -// CK2-DAG: [[RET:%.+]] = call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 [[DEVICE:.+]], i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) -// CK2-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2 -// CK2-DAG: store ptr [[BPGEP:%.+]], ptr [[BPARG]] -// CK2-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3 -// CK2-DAG: store ptr [[PGEP:%.+]], ptr [[PARG]] -// CK2-DAG: [[SARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4 -// CK2-DAG: store ptr [[SIZE:%.+]], ptr [[SARG]] -// CK2-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] -// CK2-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] - -// CK2-DAG: [[S:%[^,]+]] = sdiv exact i64 [[SZ:%.+]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CK2-DAG: [[SIZE:%[^,]+]] = getelementptr inbounds [2 x i64], ptr %.offload_sizes, i32 0, i32 0 -// CK2-DAG: store i64 [[S]], ptr [[SIZE]] -// CK2-DAG: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 -// CK2-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 -// CK2-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 -// CK2-DAG: store ptr [[THIS1]], ptr [[BP0]] -// CK2-DAG: store ptr [[B]], ptr [[P0]] + #pragma omp target is_device_ptr(b) { b++; } -// CK2-DAG: [[RET:%.+]] = call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 [[DEVICE:.+]], i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) -// CK2-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2 -// CK2-DAG: store ptr [[BPGEP:%.+]], ptr [[BPARG]] -// CK2-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3 -// CK2-DAG: store ptr [[PGEP:%.+]], ptr [[PARG]] -// CK2-DAG: [[SARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4 -// CK2-DAG: store ptr [[SIZE:%.+]], ptr [[SARG]] -// CK2-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] -// CK2-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] - -// CK2-DAG: [[A8:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 -// CK2-DAG: [[B9:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 -// CK2-DAG: [[S:%[^,]+]] = sdiv exact i64 [[SZ:%.+]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CK2-DAG: store i64 [[S]], ptr [[SIZE:%.+]] - -// CK2-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 -// CK2-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 -// CK2-DAG: store ptr [[THIS1]], ptr [[BP0]] -// CK2-DAG: store ptr [[A8]], ptr [[TMP64:%.+]] + + #pragma omp target is_device_ptr(a, b) { a++; @@ -303,42 +137,5595 @@ void bar(double *arg){ } #endif ///==========================================================================/// -// RUN: %clang_cc1 -DCK3 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64 +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK30 // RUN: %clang_cc1 -DCK3 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64 -// RUN: %clang_cc1 -DCK3 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK31 +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK32 // RUN: %clang_cc1 -DCK3 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK33 -// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY20 %s // RUN: %clang_cc1 -DCK3 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s -// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY21 %s +// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY22 %s // RUN: %clang_cc1 -DCK3 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s -// SIMD-ONLY1-NOT: {{__kmpc|__tgt}} +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY23 %s #ifdef CK3 -// CK3-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[SZ:64|32]]] [i{{64|32}} {{8|4}}] // OMP_MAP_TARGET_PARAM = 0x20 | OMP_MAP_TO = 0x1 = 0x21 -// CK3-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 [[#0x21]]] void bar() { __attribute__((aligned(64))) double *ptr; - // CK3-DAG: [[RET:%.+]] = call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 [[DEVICE:.+]], i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) - // CK3-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2 - // CK3-DAG: store ptr [[BPGEP:%.+]], ptr [[BPARG]] - // CK3-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3 - // CK3-DAG: store ptr [[PGEP:%.+]], ptr [[PARG]] - // CK3-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 - // CK3-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 - // CK3-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 - // CK3-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 - // CK3-DAG: store ptr [[PTR:%.+]], ptr [[BP1]] - // CK3-DAG: store ptr [[PTR]], ptr [[P1]] - - // CK3: call void [[KERNEL:@.+]](ptr [[PTR]]) + #pragma omp target is_device_ptr(ptr) *ptr = 0; } #endif #endif +// CK1-64-LABEL: define {{[^@]+}}@_Z3barRPfRPi +// CK1-64-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CK1-64-NEXT: entry: +// CK1-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CK1-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CK1-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CK1-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CK1-64-NEXT: call void @_Z3fooIiEvRPfRPT_(ptr noundef nonnull align 8 dereferenceable(8) [[TMP0]], ptr noundef nonnull align 8 dereferenceable(8) [[TMP1]]) +// CK1-64-NEXT: ret void +// CK1-64-LABEL: define {{[^@]+}}@_Z3fooIiEvRPfRPT_ +// CK1-64-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[LR:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[TR:%.*]]) #[[ATTR0]] comdat { +// CK1-64-NEXT: entry: +// CK1-64-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[L:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[T:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS7:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_PTRS8:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_MAPPERS9:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS13:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_PTRS14:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_MAPPERS15:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[_TMP19:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS20:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_PTRS21:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_MAPPERS22:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[_TMP26:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[_TMP33:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[_TMP34:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS35:%.*]] = alloca [2 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_PTRS36:%.*]] = alloca [2 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_MAPPERS37:%.*]] = alloca [2 x ptr], align 8 +// CK1-64-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 8 +// CK1-64-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// CK1-64-NEXT: [[TMP0:%.*]] = load ptr, ptr @g, align 8 +// CK1-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CK1-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CK1-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CK1-64-NEXT: store ptr null, ptr [[TMP3]], align 8 +// CK1-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK1-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK1-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK1-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK1-64-NEXT: store i32 2, ptr [[TMP6]], align 4 +// CK1-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK1-64-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CK1-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK1-64-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CK1-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK1-64-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CK1-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK1-64-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 8 +// CK1-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK1-64-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 8 +// CK1-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK1-64-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CK1-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK1-64-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CK1-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK1-64-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK1-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK1-64-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CK1-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP16]], align 4 +// CK1-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK1-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CK1-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK1-64-NEXT: store i32 0, ptr [[TMP18]], align 4 +// CK1-64-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37.region_id, ptr [[KERNEL_ARGS]]) +// CK1-64-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CK1-64-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK1-64: omp_offload.failed: +// CK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37(ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CK1-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK1-64: omp_offload.cont: +// CK1-64-NEXT: [[TMP21:%.*]] = load ptr, ptr [[L]], align 8 +// CK1-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP21]], ptr [[TMP22]], align 8 +// CK1-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP21]], ptr [[TMP23]], align 8 +// CK1-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 +// CK1-64-NEXT: store ptr null, ptr [[TMP24]], align 8 +// CK1-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CK1-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CK1-64-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK1-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CK1-64-NEXT: store i32 2, ptr [[TMP27]], align 4 +// CK1-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 +// CK1-64-NEXT: store i32 1, ptr [[TMP28]], align 4 +// CK1-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CK1-64-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 8 +// CK1-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 +// CK1-64-NEXT: store ptr [[TMP26]], ptr [[TMP30]], align 8 +// CK1-64-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 +// CK1-64-NEXT: store ptr @.offload_sizes.1, ptr [[TMP31]], align 8 +// CK1-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 +// CK1-64-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP32]], align 8 +// CK1-64-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CK1-64-NEXT: store ptr null, ptr [[TMP33]], align 8 +// CK1-64-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 +// CK1-64-NEXT: store ptr null, ptr [[TMP34]], align 8 +// CK1-64-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 +// CK1-64-NEXT: store i64 0, ptr [[TMP35]], align 8 +// CK1-64-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 9 +// CK1-64-NEXT: store i64 0, ptr [[TMP36]], align 8 +// CK1-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 10 +// CK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP37]], align 4 +// CK1-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 11 +// CK1-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP38]], align 4 +// CK1-64-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 12 +// CK1-64-NEXT: store i32 0, ptr [[TMP39]], align 4 +// CK1-64-NEXT: [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43.region_id, ptr [[KERNEL_ARGS4]]) +// CK1-64-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CK1-64-NEXT: br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] +// CK1-64: omp_offload.failed5: +// CK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43(ptr [[TMP21]]) #[[ATTR2]] +// CK1-64-NEXT: br label [[OMP_OFFLOAD_CONT6]] +// CK1-64: omp_offload.cont6: +// CK1-64-NEXT: [[TMP42:%.*]] = load ptr, ptr [[T]], align 8 +// CK1-64-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP42]], ptr [[TMP43]], align 8 +// CK1-64-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP42]], ptr [[TMP44]], align 8 +// CK1-64-NEXT: [[TMP45:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0 +// CK1-64-NEXT: store ptr null, ptr [[TMP45]], align 8 +// CK1-64-NEXT: [[TMP46:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CK1-64-NEXT: [[TMP47:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CK1-64-NEXT: [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK1-64-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 +// CK1-64-NEXT: store i32 2, ptr [[TMP48]], align 4 +// CK1-64-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 +// CK1-64-NEXT: store i32 1, ptr [[TMP49]], align 4 +// CK1-64-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 +// CK1-64-NEXT: store ptr [[TMP46]], ptr [[TMP50]], align 8 +// CK1-64-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 +// CK1-64-NEXT: store ptr [[TMP47]], ptr [[TMP51]], align 8 +// CK1-64-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 +// CK1-64-NEXT: store ptr @.offload_sizes.3, ptr [[TMP52]], align 8 +// CK1-64-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 +// CK1-64-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP53]], align 8 +// CK1-64-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 +// CK1-64-NEXT: store ptr null, ptr [[TMP54]], align 8 +// CK1-64-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 +// CK1-64-NEXT: store ptr null, ptr [[TMP55]], align 8 +// CK1-64-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 +// CK1-64-NEXT: store i64 0, ptr [[TMP56]], align 8 +// CK1-64-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 9 +// CK1-64-NEXT: store i64 0, ptr [[TMP57]], align 8 +// CK1-64-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 10 +// CK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP58]], align 4 +// CK1-64-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 11 +// CK1-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP59]], align 4 +// CK1-64-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 12 +// CK1-64-NEXT: store i32 0, ptr [[TMP60]], align 4 +// CK1-64-NEXT: [[TMP61:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49.region_id, ptr [[KERNEL_ARGS10]]) +// CK1-64-NEXT: [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0 +// CK1-64-NEXT: br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] +// CK1-64: omp_offload.failed11: +// CK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49(ptr [[TMP42]]) #[[ATTR2]] +// CK1-64-NEXT: br label [[OMP_OFFLOAD_CONT12]] +// CK1-64: omp_offload.cont12: +// CK1-64-NEXT: [[TMP63:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// CK1-64-NEXT: store ptr [[TMP63]], ptr [[TMP]], align 8 +// CK1-64-NEXT: [[TMP64:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK1-64-NEXT: [[TMP65:%.*]] = load ptr, ptr [[TMP64]], align 8 +// CK1-64-NEXT: [[TMP66:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP65]], ptr [[TMP66]], align 8 +// CK1-64-NEXT: [[TMP67:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP65]], ptr [[TMP67]], align 8 +// CK1-64-NEXT: [[TMP68:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS15]], i64 0, i64 0 +// CK1-64-NEXT: store ptr null, ptr [[TMP68]], align 8 +// CK1-64-NEXT: [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CK1-64-NEXT: [[TMP70:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CK1-64-NEXT: [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK1-64-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 +// CK1-64-NEXT: store i32 2, ptr [[TMP71]], align 4 +// CK1-64-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 +// CK1-64-NEXT: store i32 1, ptr [[TMP72]], align 4 +// CK1-64-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 +// CK1-64-NEXT: store ptr [[TMP69]], ptr [[TMP73]], align 8 +// CK1-64-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 3 +// CK1-64-NEXT: store ptr [[TMP70]], ptr [[TMP74]], align 8 +// CK1-64-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 4 +// CK1-64-NEXT: store ptr @.offload_sizes.5, ptr [[TMP75]], align 8 +// CK1-64-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 5 +// CK1-64-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP76]], align 8 +// CK1-64-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 6 +// CK1-64-NEXT: store ptr null, ptr [[TMP77]], align 8 +// CK1-64-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 7 +// CK1-64-NEXT: store ptr null, ptr [[TMP78]], align 8 +// CK1-64-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 8 +// CK1-64-NEXT: store i64 0, ptr [[TMP79]], align 8 +// CK1-64-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 9 +// CK1-64-NEXT: store i64 0, ptr [[TMP80]], align 8 +// CK1-64-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 10 +// CK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP81]], align 4 +// CK1-64-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 11 +// CK1-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP82]], align 4 +// CK1-64-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 12 +// CK1-64-NEXT: store i32 0, ptr [[TMP83]], align 4 +// CK1-64-NEXT: [[TMP84:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55.region_id, ptr [[KERNEL_ARGS16]]) +// CK1-64-NEXT: [[TMP85:%.*]] = icmp ne i32 [[TMP84]], 0 +// CK1-64-NEXT: br i1 [[TMP85]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]] +// CK1-64: omp_offload.failed17: +// CK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55(ptr [[TMP65]]) #[[ATTR2]] +// CK1-64-NEXT: br label [[OMP_OFFLOAD_CONT18]] +// CK1-64: omp_offload.cont18: +// CK1-64-NEXT: [[TMP86:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// CK1-64-NEXT: store ptr [[TMP86]], ptr [[_TMP19]], align 8 +// CK1-64-NEXT: [[TMP87:%.*]] = load ptr, ptr [[_TMP19]], align 8 +// CK1-64-NEXT: [[TMP88:%.*]] = load ptr, ptr [[TMP87]], align 8 +// CK1-64-NEXT: [[TMP89:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP88]], ptr [[TMP89]], align 8 +// CK1-64-NEXT: [[TMP90:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP88]], ptr [[TMP90]], align 8 +// CK1-64-NEXT: [[TMP91:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0 +// CK1-64-NEXT: store ptr null, ptr [[TMP91]], align 8 +// CK1-64-NEXT: [[TMP92:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CK1-64-NEXT: [[TMP93:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CK1-64-NEXT: [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK1-64-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 +// CK1-64-NEXT: store i32 2, ptr [[TMP94]], align 4 +// CK1-64-NEXT: [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 +// CK1-64-NEXT: store i32 1, ptr [[TMP95]], align 4 +// CK1-64-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 +// CK1-64-NEXT: store ptr [[TMP92]], ptr [[TMP96]], align 8 +// CK1-64-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 3 +// CK1-64-NEXT: store ptr [[TMP93]], ptr [[TMP97]], align 8 +// CK1-64-NEXT: [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 4 +// CK1-64-NEXT: store ptr @.offload_sizes.7, ptr [[TMP98]], align 8 +// CK1-64-NEXT: [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 5 +// CK1-64-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP99]], align 8 +// CK1-64-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 6 +// CK1-64-NEXT: store ptr null, ptr [[TMP100]], align 8 +// CK1-64-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 7 +// CK1-64-NEXT: store ptr null, ptr [[TMP101]], align 8 +// CK1-64-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 8 +// CK1-64-NEXT: store i64 0, ptr [[TMP102]], align 8 +// CK1-64-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 9 +// CK1-64-NEXT: store i64 0, ptr [[TMP103]], align 8 +// CK1-64-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 10 +// CK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP104]], align 4 +// CK1-64-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 11 +// CK1-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP105]], align 4 +// CK1-64-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 12 +// CK1-64-NEXT: store i32 0, ptr [[TMP106]], align 4 +// CK1-64-NEXT: [[TMP107:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61.region_id, ptr [[KERNEL_ARGS23]]) +// CK1-64-NEXT: [[TMP108:%.*]] = icmp ne i32 [[TMP107]], 0 +// CK1-64-NEXT: br i1 [[TMP108]], label [[OMP_OFFLOAD_FAILED24:%.*]], label [[OMP_OFFLOAD_CONT25:%.*]] +// CK1-64: omp_offload.failed24: +// CK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61(ptr [[TMP88]]) #[[ATTR2]] +// CK1-64-NEXT: br label [[OMP_OFFLOAD_CONT25]] +// CK1-64: omp_offload.cont25: +// CK1-64-NEXT: [[TMP109:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// CK1-64-NEXT: store ptr [[TMP109]], ptr [[_TMP26]], align 8 +// CK1-64-NEXT: [[TMP110:%.*]] = load ptr, ptr [[_TMP26]], align 8 +// CK1-64-NEXT: [[TMP111:%.*]] = load ptr, ptr [[TMP110]], align 8 +// CK1-64-NEXT: [[TMP112:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP111]], ptr [[TMP112]], align 8 +// CK1-64-NEXT: [[TMP113:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP111]], ptr [[TMP113]], align 8 +// CK1-64-NEXT: [[TMP114:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i64 0, i64 0 +// CK1-64-NEXT: store ptr null, ptr [[TMP114]], align 8 +// CK1-64-NEXT: [[TMP115:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 +// CK1-64-NEXT: [[TMP116:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 +// CK1-64-NEXT: [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK1-64-NEXT: [[TMP117:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 +// CK1-64-NEXT: store i32 2, ptr [[TMP117]], align 4 +// CK1-64-NEXT: [[TMP118:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 +// CK1-64-NEXT: store i32 1, ptr [[TMP118]], align 4 +// CK1-64-NEXT: [[TMP119:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 +// CK1-64-NEXT: store ptr [[TMP115]], ptr [[TMP119]], align 8 +// CK1-64-NEXT: [[TMP120:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 3 +// CK1-64-NEXT: store ptr [[TMP116]], ptr [[TMP120]], align 8 +// CK1-64-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 4 +// CK1-64-NEXT: store ptr @.offload_sizes.9, ptr [[TMP121]], align 8 +// CK1-64-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 5 +// CK1-64-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP122]], align 8 +// CK1-64-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 6 +// CK1-64-NEXT: store ptr null, ptr [[TMP123]], align 8 +// CK1-64-NEXT: [[TMP124:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 7 +// CK1-64-NEXT: store ptr null, ptr [[TMP124]], align 8 +// CK1-64-NEXT: [[TMP125:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 8 +// CK1-64-NEXT: store i64 0, ptr [[TMP125]], align 8 +// CK1-64-NEXT: [[TMP126:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 9 +// CK1-64-NEXT: store i64 0, ptr [[TMP126]], align 8 +// CK1-64-NEXT: [[TMP127:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 10 +// CK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP127]], align 4 +// CK1-64-NEXT: [[TMP128:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 11 +// CK1-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP128]], align 4 +// CK1-64-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 12 +// CK1-64-NEXT: store i32 0, ptr [[TMP129]], align 4 +// CK1-64-NEXT: [[TMP130:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67.region_id, ptr [[KERNEL_ARGS30]]) +// CK1-64-NEXT: [[TMP131:%.*]] = icmp ne i32 [[TMP130]], 0 +// CK1-64-NEXT: br i1 [[TMP131]], label [[OMP_OFFLOAD_FAILED31:%.*]], label [[OMP_OFFLOAD_CONT32:%.*]] +// CK1-64: omp_offload.failed31: +// CK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67(ptr [[TMP111]]) #[[ATTR2]] +// CK1-64-NEXT: br label [[OMP_OFFLOAD_CONT32]] +// CK1-64: omp_offload.cont32: +// CK1-64-NEXT: [[TMP132:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// CK1-64-NEXT: store ptr [[TMP132]], ptr [[_TMP33]], align 8 +// CK1-64-NEXT: [[TMP133:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// CK1-64-NEXT: store ptr [[TMP133]], ptr [[_TMP34]], align 8 +// CK1-64-NEXT: [[TMP134:%.*]] = load ptr, ptr [[_TMP33]], align 8 +// CK1-64-NEXT: [[TMP135:%.*]] = load ptr, ptr [[TMP134]], align 8 +// CK1-64-NEXT: [[TMP136:%.*]] = load ptr, ptr [[_TMP34]], align 8 +// CK1-64-NEXT: [[TMP137:%.*]] = load ptr, ptr [[TMP136]], align 8 +// CK1-64-NEXT: [[TMP138:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP135]], ptr [[TMP138]], align 8 +// CK1-64-NEXT: [[TMP139:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP135]], ptr [[TMP139]], align 8 +// CK1-64-NEXT: [[TMP140:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS37]], i64 0, i64 0 +// CK1-64-NEXT: store ptr null, ptr [[TMP140]], align 8 +// CK1-64-NEXT: [[TMP141:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 1 +// CK1-64-NEXT: store ptr [[TMP137]], ptr [[TMP141]], align 8 +// CK1-64-NEXT: [[TMP142:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 1 +// CK1-64-NEXT: store ptr [[TMP137]], ptr [[TMP142]], align 8 +// CK1-64-NEXT: [[TMP143:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS37]], i64 0, i64 1 +// CK1-64-NEXT: store ptr null, ptr [[TMP143]], align 8 +// CK1-64-NEXT: [[TMP144:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 +// CK1-64-NEXT: [[TMP145:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 +// CK1-64-NEXT: [[KERNEL_ARGS38:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK1-64-NEXT: [[TMP146:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 0 +// CK1-64-NEXT: store i32 2, ptr [[TMP146]], align 4 +// CK1-64-NEXT: [[TMP147:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 1 +// CK1-64-NEXT: store i32 2, ptr [[TMP147]], align 4 +// CK1-64-NEXT: [[TMP148:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 2 +// CK1-64-NEXT: store ptr [[TMP144]], ptr [[TMP148]], align 8 +// CK1-64-NEXT: [[TMP149:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 3 +// CK1-64-NEXT: store ptr [[TMP145]], ptr [[TMP149]], align 8 +// CK1-64-NEXT: [[TMP150:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 4 +// CK1-64-NEXT: store ptr @.offload_sizes.11, ptr [[TMP150]], align 8 +// CK1-64-NEXT: [[TMP151:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 5 +// CK1-64-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP151]], align 8 +// CK1-64-NEXT: [[TMP152:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 6 +// CK1-64-NEXT: store ptr null, ptr [[TMP152]], align 8 +// CK1-64-NEXT: [[TMP153:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 7 +// CK1-64-NEXT: store ptr null, ptr [[TMP153]], align 8 +// CK1-64-NEXT: [[TMP154:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 8 +// CK1-64-NEXT: store i64 0, ptr [[TMP154]], align 8 +// CK1-64-NEXT: [[TMP155:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 9 +// CK1-64-NEXT: store i64 0, ptr [[TMP155]], align 8 +// CK1-64-NEXT: [[TMP156:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 10 +// CK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP156]], align 4 +// CK1-64-NEXT: [[TMP157:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 11 +// CK1-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP157]], align 4 +// CK1-64-NEXT: [[TMP158:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 12 +// CK1-64-NEXT: store i32 0, ptr [[TMP158]], align 4 +// CK1-64-NEXT: [[TMP159:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74.region_id, ptr [[KERNEL_ARGS38]]) +// CK1-64-NEXT: [[TMP160:%.*]] = icmp ne i32 [[TMP159]], 0 +// CK1-64-NEXT: br i1 [[TMP160]], label [[OMP_OFFLOAD_FAILED39:%.*]], label [[OMP_OFFLOAD_CONT40:%.*]] +// CK1-64: omp_offload.failed39: +// CK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74(ptr [[TMP135]], ptr [[TMP137]]) #[[ATTR2]] +// CK1-64-NEXT: br label [[OMP_OFFLOAD_CONT40]] +// CK1-64: omp_offload.cont40: +// CK1-64-NEXT: ret void +// CK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37 +// CK1-64-SAME: (ptr noundef [[G:%.*]]) #[[ATTR1:[0-9]+]] { +// CK1-64-NEXT: entry: +// CK1-64-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 +// CK1-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CK1-64-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK1-64-NEXT: store ptr [[INCDEC_PTR]], ptr [[G_ADDR]], align 8 +// CK1-64-NEXT: ret void +// CK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43 +// CK1-64-SAME: (ptr noundef [[L:%.*]]) #[[ATTR1]] { +// CK1-64-NEXT: entry: +// CK1-64-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 8 +// CK1-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[L_ADDR]], align 8 +// CK1-64-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 1 +// CK1-64-NEXT: store ptr [[INCDEC_PTR]], ptr [[L_ADDR]], align 8 +// CK1-64-NEXT: ret void +// CK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49 +// CK1-64-SAME: (ptr noundef [[T:%.*]]) #[[ATTR1]] { +// CK1-64-NEXT: entry: +// CK1-64-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8 +// CK1-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8 +// CK1-64-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 +// CK1-64-NEXT: store ptr [[INCDEC_PTR]], ptr [[T_ADDR]], align 8 +// CK1-64-NEXT: ret void +// CK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55 +// CK1-64-SAME: (ptr noundef [[LR:%.*]]) #[[ATTR1]] { +// CK1-64-NEXT: entry: +// CK1-64-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 8 +// CK1-64-NEXT: store ptr [[LR_ADDR]], ptr [[TMP]], align 8 +// CK1-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK1-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// CK1-64-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// CK1-64-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 +// CK1-64-NEXT: ret void +// CK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61 +// CK1-64-SAME: (ptr noundef [[TR:%.*]]) #[[ATTR1]] { +// CK1-64-NEXT: entry: +// CK1-64-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// CK1-64-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 8 +// CK1-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK1-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// CK1-64-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK1-64-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 +// CK1-64-NEXT: ret void +// CK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67 +// CK1-64-SAME: (ptr noundef [[TR:%.*]]) #[[ATTR1]] { +// CK1-64-NEXT: entry: +// CK1-64-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// CK1-64-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 8 +// CK1-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK1-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// CK1-64-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK1-64-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 +// CK1-64-NEXT: ret void +// CK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74 +// CK1-64-SAME: (ptr noundef [[TR:%.*]], ptr noundef [[LR:%.*]]) #[[ATTR1]] { +// CK1-64-NEXT: entry: +// CK1-64-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// CK1-64-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 8 +// CK1-64-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 8 +// CK1-64-NEXT: store ptr [[LR_ADDR]], ptr [[_TMP1]], align 8 +// CK1-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK1-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// CK1-64-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK1-64-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 +// CK1-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CK1-64-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CK1-64-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 1 +// CK1-64-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP2]], align 8 +// CK1-64-NEXT: ret void +// CK1-64-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK1-64-SAME: () #[[ATTR3:[0-9]+]] { +// CK1-64-NEXT: entry: +// CK1-64-NEXT: call void @__tgt_register_requires(i64 1) +// CK1-64-NEXT: ret void +// CK2-64-LABEL: define {{[^@]+}}@_Z3barPd +// CK2-64-SAME: (ptr noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +// CK2-64-NEXT: entry: +// CK2-64-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CK2-64-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 8 +// CK2-64-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 +// CK2-64-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) +// CK2-64-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) +// CK2-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 +// CK2-64-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK2-64-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 8 +// CK2-64-NEXT: ret void +// CK2-64-LABEL: define {{[^@]+}}@_ZN2STIdEC1ERPd +// CK2-64-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CK2-64-NEXT: entry: +// CK2-64-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK2-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CK2-64-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK2-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CK2-64-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK2-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CK2-64-NEXT: call void @_ZN2STIdEC2ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]], ptr noundef nonnull align 8 dereferenceable(8) [[TMP0]]) +// CK2-64-NEXT: ret void +// CK2-64-LABEL: define {{[^@]+}}@_ZN2STIdE3fooERPd +// CK2-64-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG:%.*]]) #[[ATTR0]] comdat align 2 { +// CK2-64-NEXT: entry: +// CK2-64-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK2-64-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CK2-64-NEXT: [[LA:%.*]] = alloca ptr, align 8 +// CK2-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CK2-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CK2-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CK2-64-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x ptr], align 8 +// CK2-64-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x ptr], align 8 +// CK2-64-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [2 x ptr], align 8 +// CK2-64-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [2 x i64], align 8 +// CK2-64-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [3 x ptr], align 8 +// CK2-64-NEXT: [[DOTOFFLOAD_PTRS11:%.*]] = alloca [3 x ptr], align 8 +// CK2-64-NEXT: [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [3 x ptr], align 8 +// CK2-64-NEXT: [[DOTOFFLOAD_SIZES13:%.*]] = alloca [3 x i64], align 8 +// CK2-64-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK2-64-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 +// CK2-64-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK2-64-NEXT: store ptr null, ptr [[LA]], align 8 +// CK2-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CK2-64-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK2-64-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 +// CK2-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK2-64-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CK2-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CK2-64-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CK2-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK2-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK2-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK2-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK2-64-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CK2-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK2-64-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CK2-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK2-64-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CK2-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK2-64-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CK2-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK2-64-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CK2-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK2-64-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CK2-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK2-64-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CK2-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK2-64-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CK2-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK2-64-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CK2-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK2-64-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK2-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK2-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP15]], align 4 +// CK2-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK2-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CK2-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK2-64-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CK2-64-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112.region_id, ptr [[KERNEL_ARGS]]) +// CK2-64-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CK2-64-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK2-64: omp_offload.failed: +// CK2-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112(ptr [[THIS1]]) #[[ATTR3:[0-9]+]] +// CK2-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK2-64: omp_offload.cont: +// CK2-64-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK2-64-NEXT: [[TMP20:%.*]] = load ptr, ptr [[B]], align 8 +// CK2-64-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[B]], i32 1 +// CK2-64-NEXT: [[TMP22:%.*]] = ptrtoint ptr [[TMP21]] to i64 +// CK2-64-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[B]] to i64 +// CK2-64-NEXT: [[TMP24:%.*]] = sub i64 [[TMP22]], [[TMP23]] +// CK2-64-NEXT: [[TMP25:%.*]] = sdiv exact i64 [[TMP24]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CK2-64-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes.1, i64 16, i1 false) +// CK2-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CK2-64-NEXT: store ptr [[THIS1]], ptr [[TMP26]], align 8 +// CK2-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CK2-64-NEXT: store ptr [[B]], ptr [[TMP27]], align 8 +// CK2-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CK2-64-NEXT: store i64 [[TMP25]], ptr [[TMP28]], align 8 +// CK2-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 0 +// CK2-64-NEXT: store ptr null, ptr [[TMP29]], align 8 +// CK2-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1 +// CK2-64-NEXT: store ptr [[THIS1]], ptr [[TMP30]], align 8 +// CK2-64-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 1 +// CK2-64-NEXT: store ptr [[TMP20]], ptr [[TMP31]], align 8 +// CK2-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 1 +// CK2-64-NEXT: store ptr null, ptr [[TMP32]], align 8 +// CK2-64-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CK2-64-NEXT: [[TMP34:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CK2-64-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CK2-64-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK2-64-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 +// CK2-64-NEXT: store i32 2, ptr [[TMP36]], align 4 +// CK2-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 +// CK2-64-NEXT: store i32 2, ptr [[TMP37]], align 4 +// CK2-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 +// CK2-64-NEXT: store ptr [[TMP33]], ptr [[TMP38]], align 8 +// CK2-64-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3 +// CK2-64-NEXT: store ptr [[TMP34]], ptr [[TMP39]], align 8 +// CK2-64-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4 +// CK2-64-NEXT: store ptr [[TMP35]], ptr [[TMP40]], align 8 +// CK2-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5 +// CK2-64-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP41]], align 8 +// CK2-64-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6 +// CK2-64-NEXT: store ptr null, ptr [[TMP42]], align 8 +// CK2-64-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7 +// CK2-64-NEXT: store ptr null, ptr [[TMP43]], align 8 +// CK2-64-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8 +// CK2-64-NEXT: store i64 0, ptr [[TMP44]], align 8 +// CK2-64-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9 +// CK2-64-NEXT: store i64 0, ptr [[TMP45]], align 8 +// CK2-64-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10 +// CK2-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP46]], align 4 +// CK2-64-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11 +// CK2-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP47]], align 4 +// CK2-64-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12 +// CK2-64-NEXT: store i32 0, ptr [[TMP48]], align 4 +// CK2-64-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118.region_id, ptr [[KERNEL_ARGS5]]) +// CK2-64-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CK2-64-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CK2-64: omp_offload.failed6: +// CK2-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118(ptr [[THIS1]]) #[[ATTR3]] +// CK2-64-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CK2-64: omp_offload.cont7: +// CK2-64-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 +// CK2-64-NEXT: [[B9:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK2-64-NEXT: [[TMP51:%.*]] = load ptr, ptr [[B9]], align 8 +// CK2-64-NEXT: [[TMP52:%.*]] = getelementptr ptr, ptr [[B9]], i32 1 +// CK2-64-NEXT: [[TMP53:%.*]] = ptrtoint ptr [[TMP52]] to i64 +// CK2-64-NEXT: [[TMP54:%.*]] = ptrtoint ptr [[A8]] to i64 +// CK2-64-NEXT: [[TMP55:%.*]] = sub i64 [[TMP53]], [[TMP54]] +// CK2-64-NEXT: [[TMP56:%.*]] = sdiv exact i64 [[TMP55]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CK2-64-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES13]], ptr align 8 @.offload_sizes.3, i64 24, i1 false) +// CK2-64-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CK2-64-NEXT: store ptr [[THIS1]], ptr [[TMP57]], align 8 +// CK2-64-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CK2-64-NEXT: store ptr [[A8]], ptr [[TMP58]], align 8 +// CK2-64-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 +// CK2-64-NEXT: store i64 [[TMP56]], ptr [[TMP59]], align 8 +// CK2-64-NEXT: [[TMP60:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0 +// CK2-64-NEXT: store ptr null, ptr [[TMP60]], align 8 +// CK2-64-NEXT: [[TMP61:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1 +// CK2-64-NEXT: store ptr [[THIS1]], ptr [[TMP61]], align 8 +// CK2-64-NEXT: [[TMP62:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 1 +// CK2-64-NEXT: store ptr [[A8]], ptr [[TMP62]], align 8 +// CK2-64-NEXT: [[TMP63:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1 +// CK2-64-NEXT: store ptr null, ptr [[TMP63]], align 8 +// CK2-64-NEXT: [[TMP64:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 2 +// CK2-64-NEXT: store ptr [[THIS1]], ptr [[TMP64]], align 8 +// CK2-64-NEXT: [[TMP65:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 2 +// CK2-64-NEXT: store ptr [[TMP51]], ptr [[TMP65]], align 8 +// CK2-64-NEXT: [[TMP66:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 2 +// CK2-64-NEXT: store ptr null, ptr [[TMP66]], align 8 +// CK2-64-NEXT: [[TMP67:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CK2-64-NEXT: [[TMP68:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CK2-64-NEXT: [[TMP69:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 +// CK2-64-NEXT: [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK2-64-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 +// CK2-64-NEXT: store i32 2, ptr [[TMP70]], align 4 +// CK2-64-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 +// CK2-64-NEXT: store i32 3, ptr [[TMP71]], align 4 +// CK2-64-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 +// CK2-64-NEXT: store ptr [[TMP67]], ptr [[TMP72]], align 8 +// CK2-64-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 3 +// CK2-64-NEXT: store ptr [[TMP68]], ptr [[TMP73]], align 8 +// CK2-64-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 4 +// CK2-64-NEXT: store ptr [[TMP69]], ptr [[TMP74]], align 8 +// CK2-64-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 5 +// CK2-64-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP75]], align 8 +// CK2-64-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 6 +// CK2-64-NEXT: store ptr null, ptr [[TMP76]], align 8 +// CK2-64-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 7 +// CK2-64-NEXT: store ptr null, ptr [[TMP77]], align 8 +// CK2-64-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 8 +// CK2-64-NEXT: store i64 0, ptr [[TMP78]], align 8 +// CK2-64-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 9 +// CK2-64-NEXT: store i64 0, ptr [[TMP79]], align 8 +// CK2-64-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 10 +// CK2-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP80]], align 4 +// CK2-64-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 11 +// CK2-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP81]], align 4 +// CK2-64-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 12 +// CK2-64-NEXT: store i32 0, ptr [[TMP82]], align 4 +// CK2-64-NEXT: [[TMP83:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125.region_id, ptr [[KERNEL_ARGS14]]) +// CK2-64-NEXT: [[TMP84:%.*]] = icmp ne i32 [[TMP83]], 0 +// CK2-64-NEXT: br i1 [[TMP84]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]] +// CK2-64: omp_offload.failed15: +// CK2-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125(ptr [[THIS1]]) #[[ATTR3]] +// CK2-64-NEXT: br label [[OMP_OFFLOAD_CONT16]] +// CK2-64: omp_offload.cont16: +// CK2-64-NEXT: ret void +// CK2-64-LABEL: define {{[^@]+}}@_ZN2STIdEC2ERPd +// CK2-64-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CK2-64-NEXT: entry: +// CK2-64-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK2-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CK2-64-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK2-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CK2-64-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK2-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CK2-64-NEXT: store ptr null, ptr [[A]], align 8 +// CK2-64-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK2-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CK2-64-NEXT: store ptr [[TMP0]], ptr [[B2]], align 8 +// CK2-64-NEXT: ret void +// CK2-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112 +// CK2-64-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { +// CK2-64-NEXT: entry: +// CK2-64-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK2-64-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK2-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK2-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CK2-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8 +// CK2-64-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK2-64-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 8 +// CK2-64-NEXT: ret void +// CK2-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118 +// CK2-64-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CK2-64-NEXT: entry: +// CK2-64-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK2-64-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK2-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK2-64-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 1 +// CK2-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 8 +// CK2-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CK2-64-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// CK2-64-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP1]], align 8 +// CK2-64-NEXT: ret void +// CK2-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125 +// CK2-64-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CK2-64-NEXT: entry: +// CK2-64-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK2-64-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK2-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK2-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CK2-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8 +// CK2-64-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK2-64-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 8 +// CK2-64-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP0]], i32 0, i32 1 +// CK2-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B]], align 8 +// CK2-64-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CK2-64-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// CK2-64-NEXT: store ptr [[INCDEC_PTR1]], ptr [[TMP2]], align 8 +// CK2-64-NEXT: ret void +// CK2-64-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK2-64-SAME: () #[[ATTR5:[0-9]+]] { +// CK2-64-NEXT: entry: +// CK2-64-NEXT: call void @__tgt_register_requires(i64 1) +// CK2-64-NEXT: ret void +// CK2-32-LABEL: define {{[^@]+}}@_Z3barPd +// CK2-32-SAME: (ptr noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +// CK2-32-NEXT: entry: +// CK2-32-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 4 +// CK2-32-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 +// CK2-32-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 4 +// CK2-32-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) +// CK2-32-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) +// CK2-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 4 +// CK2-32-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK2-32-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 4 +// CK2-32-NEXT: ret void +// CK2-32-LABEL: define {{[^@]+}}@_ZN2STIdEC1ERPd +// CK2-32-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CK2-32-NEXT: entry: +// CK2-32-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK2-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CK2-32-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK2-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CK2-32-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK2-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CK2-32-NEXT: call void @_ZN2STIdEC2ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0]]) +// CK2-32-NEXT: ret void +// CK2-32-LABEL: define {{[^@]+}}@_ZN2STIdE3fooERPd +// CK2-32-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR0]] comdat align 2 { +// CK2-32-NEXT: entry: +// CK2-32-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK2-32-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 4 +// CK2-32-NEXT: [[LA:%.*]] = alloca ptr, align 4 +// CK2-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CK2-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CK2-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CK2-32-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x ptr], align 4 +// CK2-32-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x ptr], align 4 +// CK2-32-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [2 x ptr], align 4 +// CK2-32-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [2 x i64], align 4 +// CK2-32-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [3 x ptr], align 4 +// CK2-32-NEXT: [[DOTOFFLOAD_PTRS11:%.*]] = alloca [3 x ptr], align 4 +// CK2-32-NEXT: [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [3 x ptr], align 4 +// CK2-32-NEXT: [[DOTOFFLOAD_SIZES13:%.*]] = alloca [3 x i64], align 4 +// CK2-32-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK2-32-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 4 +// CK2-32-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK2-32-NEXT: store ptr null, ptr [[LA]], align 4 +// CK2-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CK2-32-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK2-32-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4 +// CK2-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK2-32-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CK2-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CK2-32-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CK2-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK2-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK2-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK2-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK2-32-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CK2-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK2-32-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CK2-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK2-32-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CK2-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK2-32-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CK2-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK2-32-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CK2-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK2-32-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CK2-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK2-32-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CK2-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK2-32-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CK2-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK2-32-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CK2-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK2-32-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK2-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK2-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP15]], align 4 +// CK2-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK2-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CK2-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK2-32-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CK2-32-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112.region_id, ptr [[KERNEL_ARGS]]) +// CK2-32-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CK2-32-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK2-32: omp_offload.failed: +// CK2-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112(ptr [[THIS1]]) #[[ATTR3:[0-9]+]] +// CK2-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK2-32: omp_offload.cont: +// CK2-32-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK2-32-NEXT: [[TMP20:%.*]] = load ptr, ptr [[B]], align 4 +// CK2-32-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[B]], i32 1 +// CK2-32-NEXT: [[TMP22:%.*]] = ptrtoint ptr [[TMP21]] to i64 +// CK2-32-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[B]] to i64 +// CK2-32-NEXT: [[TMP24:%.*]] = sub i64 [[TMP22]], [[TMP23]] +// CK2-32-NEXT: [[TMP25:%.*]] = sdiv exact i64 [[TMP24]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CK2-32-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes.1, i32 16, i1 false) +// CK2-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CK2-32-NEXT: store ptr [[THIS1]], ptr [[TMP26]], align 4 +// CK2-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CK2-32-NEXT: store ptr [[B]], ptr [[TMP27]], align 4 +// CK2-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CK2-32-NEXT: store i64 [[TMP25]], ptr [[TMP28]], align 4 +// CK2-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0 +// CK2-32-NEXT: store ptr null, ptr [[TMP29]], align 4 +// CK2-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1 +// CK2-32-NEXT: store ptr [[THIS1]], ptr [[TMP30]], align 4 +// CK2-32-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 1 +// CK2-32-NEXT: store ptr [[TMP20]], ptr [[TMP31]], align 4 +// CK2-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1 +// CK2-32-NEXT: store ptr null, ptr [[TMP32]], align 4 +// CK2-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CK2-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CK2-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CK2-32-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK2-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 +// CK2-32-NEXT: store i32 2, ptr [[TMP36]], align 4 +// CK2-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 +// CK2-32-NEXT: store i32 2, ptr [[TMP37]], align 4 +// CK2-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 +// CK2-32-NEXT: store ptr [[TMP33]], ptr [[TMP38]], align 4 +// CK2-32-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3 +// CK2-32-NEXT: store ptr [[TMP34]], ptr [[TMP39]], align 4 +// CK2-32-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4 +// CK2-32-NEXT: store ptr [[TMP35]], ptr [[TMP40]], align 4 +// CK2-32-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5 +// CK2-32-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP41]], align 4 +// CK2-32-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6 +// CK2-32-NEXT: store ptr null, ptr [[TMP42]], align 4 +// CK2-32-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7 +// CK2-32-NEXT: store ptr null, ptr [[TMP43]], align 4 +// CK2-32-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8 +// CK2-32-NEXT: store i64 0, ptr [[TMP44]], align 8 +// CK2-32-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9 +// CK2-32-NEXT: store i64 0, ptr [[TMP45]], align 8 +// CK2-32-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10 +// CK2-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP46]], align 4 +// CK2-32-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11 +// CK2-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP47]], align 4 +// CK2-32-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12 +// CK2-32-NEXT: store i32 0, ptr [[TMP48]], align 4 +// CK2-32-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118.region_id, ptr [[KERNEL_ARGS5]]) +// CK2-32-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CK2-32-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CK2-32: omp_offload.failed6: +// CK2-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118(ptr [[THIS1]]) #[[ATTR3]] +// CK2-32-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CK2-32: omp_offload.cont7: +// CK2-32-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 +// CK2-32-NEXT: [[B9:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK2-32-NEXT: [[TMP51:%.*]] = load ptr, ptr [[B9]], align 4 +// CK2-32-NEXT: [[TMP52:%.*]] = getelementptr ptr, ptr [[B9]], i32 1 +// CK2-32-NEXT: [[TMP53:%.*]] = ptrtoint ptr [[TMP52]] to i64 +// CK2-32-NEXT: [[TMP54:%.*]] = ptrtoint ptr [[A8]] to i64 +// CK2-32-NEXT: [[TMP55:%.*]] = sub i64 [[TMP53]], [[TMP54]] +// CK2-32-NEXT: [[TMP56:%.*]] = sdiv exact i64 [[TMP55]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CK2-32-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES13]], ptr align 4 @.offload_sizes.3, i32 24, i1 false) +// CK2-32-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CK2-32-NEXT: store ptr [[THIS1]], ptr [[TMP57]], align 4 +// CK2-32-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CK2-32-NEXT: store ptr [[A8]], ptr [[TMP58]], align 4 +// CK2-32-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 +// CK2-32-NEXT: store i64 [[TMP56]], ptr [[TMP59]], align 4 +// CK2-32-NEXT: [[TMP60:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i32 0, i32 0 +// CK2-32-NEXT: store ptr null, ptr [[TMP60]], align 4 +// CK2-32-NEXT: [[TMP61:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1 +// CK2-32-NEXT: store ptr [[THIS1]], ptr [[TMP61]], align 4 +// CK2-32-NEXT: [[TMP62:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 1 +// CK2-32-NEXT: store ptr [[A8]], ptr [[TMP62]], align 4 +// CK2-32-NEXT: [[TMP63:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i32 0, i32 1 +// CK2-32-NEXT: store ptr null, ptr [[TMP63]], align 4 +// CK2-32-NEXT: [[TMP64:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 2 +// CK2-32-NEXT: store ptr [[THIS1]], ptr [[TMP64]], align 4 +// CK2-32-NEXT: [[TMP65:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 2 +// CK2-32-NEXT: store ptr [[TMP51]], ptr [[TMP65]], align 4 +// CK2-32-NEXT: [[TMP66:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i32 0, i32 2 +// CK2-32-NEXT: store ptr null, ptr [[TMP66]], align 4 +// CK2-32-NEXT: [[TMP67:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CK2-32-NEXT: [[TMP68:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CK2-32-NEXT: [[TMP69:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 +// CK2-32-NEXT: [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK2-32-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 +// CK2-32-NEXT: store i32 2, ptr [[TMP70]], align 4 +// CK2-32-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 +// CK2-32-NEXT: store i32 3, ptr [[TMP71]], align 4 +// CK2-32-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 +// CK2-32-NEXT: store ptr [[TMP67]], ptr [[TMP72]], align 4 +// CK2-32-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 3 +// CK2-32-NEXT: store ptr [[TMP68]], ptr [[TMP73]], align 4 +// CK2-32-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 4 +// CK2-32-NEXT: store ptr [[TMP69]], ptr [[TMP74]], align 4 +// CK2-32-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 5 +// CK2-32-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP75]], align 4 +// CK2-32-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 6 +// CK2-32-NEXT: store ptr null, ptr [[TMP76]], align 4 +// CK2-32-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 7 +// CK2-32-NEXT: store ptr null, ptr [[TMP77]], align 4 +// CK2-32-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 8 +// CK2-32-NEXT: store i64 0, ptr [[TMP78]], align 8 +// CK2-32-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 9 +// CK2-32-NEXT: store i64 0, ptr [[TMP79]], align 8 +// CK2-32-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 10 +// CK2-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP80]], align 4 +// CK2-32-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 11 +// CK2-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP81]], align 4 +// CK2-32-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 12 +// CK2-32-NEXT: store i32 0, ptr [[TMP82]], align 4 +// CK2-32-NEXT: [[TMP83:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125.region_id, ptr [[KERNEL_ARGS14]]) +// CK2-32-NEXT: [[TMP84:%.*]] = icmp ne i32 [[TMP83]], 0 +// CK2-32-NEXT: br i1 [[TMP84]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]] +// CK2-32: omp_offload.failed15: +// CK2-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125(ptr [[THIS1]]) #[[ATTR3]] +// CK2-32-NEXT: br label [[OMP_OFFLOAD_CONT16]] +// CK2-32: omp_offload.cont16: +// CK2-32-NEXT: ret void +// CK2-32-LABEL: define {{[^@]+}}@_ZN2STIdEC2ERPd +// CK2-32-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CK2-32-NEXT: entry: +// CK2-32-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK2-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CK2-32-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK2-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CK2-32-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK2-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CK2-32-NEXT: store ptr null, ptr [[A]], align 4 +// CK2-32-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK2-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CK2-32-NEXT: store ptr [[TMP0]], ptr [[B2]], align 4 +// CK2-32-NEXT: ret void +// CK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112 +// CK2-32-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { +// CK2-32-NEXT: entry: +// CK2-32-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK2-32-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK2-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK2-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CK2-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 4 +// CK2-32-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK2-32-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 4 +// CK2-32-NEXT: ret void +// CK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118 +// CK2-32-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CK2-32-NEXT: entry: +// CK2-32-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK2-32-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK2-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK2-32-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 1 +// CK2-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 4 +// CK2-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CK2-32-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// CK2-32-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP1]], align 4 +// CK2-32-NEXT: ret void +// CK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125 +// CK2-32-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CK2-32-NEXT: entry: +// CK2-32-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK2-32-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK2-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK2-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CK2-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 4 +// CK2-32-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK2-32-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 4 +// CK2-32-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP0]], i32 0, i32 1 +// CK2-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B]], align 4 +// CK2-32-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CK2-32-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// CK2-32-NEXT: store ptr [[INCDEC_PTR1]], ptr [[TMP2]], align 4 +// CK2-32-NEXT: ret void +// CK2-32-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK2-32-SAME: () #[[ATTR5:[0-9]+]] { +// CK2-32-NEXT: entry: +// CK2-32-NEXT: call void @__tgt_register_requires(i64 1) +// CK2-32-NEXT: ret void +// CK3-64-LABEL: define {{[^@]+}}@_Z3barv +// CK3-64-SAME: () #[[ATTR0:[0-9]+]] { +// CK3-64-NEXT: entry: +// CK3-64-NEXT: [[PTR:%.*]] = alloca ptr, align 64 +// CK3-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CK3-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CK3-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CK3-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 64 +// CK3-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK3-64-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CK3-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK3-64-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CK3-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CK3-64-NEXT: store ptr null, ptr [[TMP3]], align 8 +// CK3-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK3-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK3-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK3-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK3-64-NEXT: store i32 2, ptr [[TMP6]], align 4 +// CK3-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK3-64-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CK3-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK3-64-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CK3-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK3-64-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CK3-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK3-64-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 8 +// CK3-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK3-64-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 8 +// CK3-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK3-64-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CK3-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK3-64-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CK3-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK3-64-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK3-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK3-64-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CK3-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK3-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP16]], align 4 +// CK3-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK3-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CK3-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK3-64-NEXT: store i32 0, ptr [[TMP18]], align 4 +// CK3-64-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159.region_id, ptr [[KERNEL_ARGS]]) +// CK3-64-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CK3-64-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK3-64: omp_offload.failed: +// CK3-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159(ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CK3-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK3-64: omp_offload.cont: +// CK3-64-NEXT: ret void +// CK3-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159 +// CK3-64-SAME: (ptr noundef [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { +// CK3-64-NEXT: entry: +// CK3-64-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8 +// CK3-64-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8 +// CK3-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +// CK3-64-NEXT: store double 0.000000e+00, ptr [[TMP0]], align 8 +// CK3-64-NEXT: ret void +// CK3-64-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK3-64-SAME: () #[[ATTR3:[0-9]+]] { +// CK3-64-NEXT: entry: +// CK3-64-NEXT: call void @__tgt_register_requires(i64 1) +// CK3-64-NEXT: ret void +// CK3-32-LABEL: define {{[^@]+}}@_Z3barv +// CK3-32-SAME: () #[[ATTR0:[0-9]+]] { +// CK3-32-NEXT: entry: +// CK3-32-NEXT: [[PTR:%.*]] = alloca ptr, align 64 +// CK3-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CK3-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CK3-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CK3-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 64 +// CK3-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK3-32-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CK3-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK3-32-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CK3-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CK3-32-NEXT: store ptr null, ptr [[TMP3]], align 4 +// CK3-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK3-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK3-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK3-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK3-32-NEXT: store i32 2, ptr [[TMP6]], align 4 +// CK3-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK3-32-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CK3-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK3-32-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CK3-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK3-32-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CK3-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK3-32-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 4 +// CK3-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK3-32-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 4 +// CK3-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK3-32-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CK3-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK3-32-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CK3-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK3-32-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK3-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK3-32-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CK3-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK3-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP16]], align 4 +// CK3-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK3-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CK3-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK3-32-NEXT: store i32 0, ptr [[TMP18]], align 4 +// CK3-32-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159.region_id, ptr [[KERNEL_ARGS]]) +// CK3-32-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CK3-32-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK3-32: omp_offload.failed: +// CK3-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159(ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CK3-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK3-32: omp_offload.cont: +// CK3-32-NEXT: ret void +// CK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159 +// CK3-32-SAME: (ptr noundef [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { +// CK3-32-NEXT: entry: +// CK3-32-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 4 +// CK3-32-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 4 +// CK3-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4 +// CK3-32-NEXT: store double 0.000000e+00, ptr [[TMP0]], align 4 +// CK3-32-NEXT: ret void +// CK3-32-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK3-32-SAME: () #[[ATTR3:[0-9]+]] { +// CK3-32-NEXT: entry: +// CK3-32-NEXT: call void @__tgt_register_requires(i64 1) +// CK3-32-NEXT: ret void +// CK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK1-SAME: () #[[ATTR3:[0-9]+]] { +// CK1-NEXT: entry: +// CK1-NEXT: call void @__tgt_register_requires(i64 1) +// CK1-NEXT: ret void +// CK1-32-LABEL: define {{[^@]+}}@_Z3barRPfRPi +// CK1-32-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CK1-32-NEXT: entry: +// CK1-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CK1-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CK1-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CK1-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CK1-32-NEXT: call void @_Z3fooIiEvRPfRPT_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP0]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +// CK1-32-NEXT: ret void +// CK1-32-LABEL: define {{[^@]+}}@_Z3fooIiEvRPfRPT_ +// CK1-32-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TR:%.*]]) #[[ATTR0]] comdat { +// CK1-32-NEXT: entry: +// CK1-32-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[L:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[T:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_BASEPTRS7:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_PTRS8:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_MAPPERS9:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_BASEPTRS13:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_PTRS14:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_MAPPERS15:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[_TMP19:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_BASEPTRS20:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_PTRS21:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_MAPPERS22:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[_TMP26:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[_TMP33:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[_TMP34:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_BASEPTRS35:%.*]] = alloca [2 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_PTRS36:%.*]] = alloca [2 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_MAPPERS37:%.*]] = alloca [2 x ptr], align 4 +// CK1-32-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 4 +// CK1-32-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// CK1-32-NEXT: [[TMP0:%.*]] = load ptr, ptr @g, align 4 +// CK1-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CK1-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CK1-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CK1-32-NEXT: store ptr null, ptr [[TMP3]], align 4 +// CK1-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK1-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK1-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK1-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK1-32-NEXT: store i32 2, ptr [[TMP6]], align 4 +// CK1-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK1-32-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CK1-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK1-32-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CK1-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK1-32-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CK1-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK1-32-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 4 +// CK1-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK1-32-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 4 +// CK1-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK1-32-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CK1-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK1-32-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CK1-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK1-32-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK1-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK1-32-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CK1-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK1-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP16]], align 4 +// CK1-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK1-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CK1-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK1-32-NEXT: store i32 0, ptr [[TMP18]], align 4 +// CK1-32-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37.region_id, ptr [[KERNEL_ARGS]]) +// CK1-32-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CK1-32-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK1-32: omp_offload.failed: +// CK1-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37(ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CK1-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK1-32: omp_offload.cont: +// CK1-32-NEXT: [[TMP21:%.*]] = load ptr, ptr [[L]], align 4 +// CK1-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP21]], ptr [[TMP22]], align 4 +// CK1-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP21]], ptr [[TMP23]], align 4 +// CK1-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 0 +// CK1-32-NEXT: store ptr null, ptr [[TMP24]], align 4 +// CK1-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CK1-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CK1-32-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK1-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CK1-32-NEXT: store i32 2, ptr [[TMP27]], align 4 +// CK1-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 +// CK1-32-NEXT: store i32 1, ptr [[TMP28]], align 4 +// CK1-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CK1-32-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 4 +// CK1-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 +// CK1-32-NEXT: store ptr [[TMP26]], ptr [[TMP30]], align 4 +// CK1-32-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 +// CK1-32-NEXT: store ptr @.offload_sizes.1, ptr [[TMP31]], align 4 +// CK1-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 +// CK1-32-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP32]], align 4 +// CK1-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CK1-32-NEXT: store ptr null, ptr [[TMP33]], align 4 +// CK1-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 +// CK1-32-NEXT: store ptr null, ptr [[TMP34]], align 4 +// CK1-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 +// CK1-32-NEXT: store i64 0, ptr [[TMP35]], align 8 +// CK1-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 9 +// CK1-32-NEXT: store i64 0, ptr [[TMP36]], align 8 +// CK1-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 10 +// CK1-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP37]], align 4 +// CK1-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 11 +// CK1-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP38]], align 4 +// CK1-32-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 12 +// CK1-32-NEXT: store i32 0, ptr [[TMP39]], align 4 +// CK1-32-NEXT: [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43.region_id, ptr [[KERNEL_ARGS4]]) +// CK1-32-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CK1-32-NEXT: br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] +// CK1-32: omp_offload.failed5: +// CK1-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43(ptr [[TMP21]]) #[[ATTR2]] +// CK1-32-NEXT: br label [[OMP_OFFLOAD_CONT6]] +// CK1-32: omp_offload.cont6: +// CK1-32-NEXT: [[TMP42:%.*]] = load ptr, ptr [[T]], align 4 +// CK1-32-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP42]], ptr [[TMP43]], align 4 +// CK1-32-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP42]], ptr [[TMP44]], align 4 +// CK1-32-NEXT: [[TMP45:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 0 +// CK1-32-NEXT: store ptr null, ptr [[TMP45]], align 4 +// CK1-32-NEXT: [[TMP46:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CK1-32-NEXT: [[TMP47:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CK1-32-NEXT: [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK1-32-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 +// CK1-32-NEXT: store i32 2, ptr [[TMP48]], align 4 +// CK1-32-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 +// CK1-32-NEXT: store i32 1, ptr [[TMP49]], align 4 +// CK1-32-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 +// CK1-32-NEXT: store ptr [[TMP46]], ptr [[TMP50]], align 4 +// CK1-32-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 +// CK1-32-NEXT: store ptr [[TMP47]], ptr [[TMP51]], align 4 +// CK1-32-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 +// CK1-32-NEXT: store ptr @.offload_sizes.3, ptr [[TMP52]], align 4 +// CK1-32-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 +// CK1-32-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP53]], align 4 +// CK1-32-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 +// CK1-32-NEXT: store ptr null, ptr [[TMP54]], align 4 +// CK1-32-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 +// CK1-32-NEXT: store ptr null, ptr [[TMP55]], align 4 +// CK1-32-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 +// CK1-32-NEXT: store i64 0, ptr [[TMP56]], align 8 +// CK1-32-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 9 +// CK1-32-NEXT: store i64 0, ptr [[TMP57]], align 8 +// CK1-32-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 10 +// CK1-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP58]], align 4 +// CK1-32-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 11 +// CK1-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP59]], align 4 +// CK1-32-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 12 +// CK1-32-NEXT: store i32 0, ptr [[TMP60]], align 4 +// CK1-32-NEXT: [[TMP61:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49.region_id, ptr [[KERNEL_ARGS10]]) +// CK1-32-NEXT: [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0 +// CK1-32-NEXT: br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] +// CK1-32: omp_offload.failed11: +// CK1-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49(ptr [[TMP42]]) #[[ATTR2]] +// CK1-32-NEXT: br label [[OMP_OFFLOAD_CONT12]] +// CK1-32: omp_offload.cont12: +// CK1-32-NEXT: [[TMP63:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// CK1-32-NEXT: store ptr [[TMP63]], ptr [[TMP]], align 4 +// CK1-32-NEXT: [[TMP64:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK1-32-NEXT: [[TMP65:%.*]] = load ptr, ptr [[TMP64]], align 4 +// CK1-32-NEXT: [[TMP66:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP65]], ptr [[TMP66]], align 4 +// CK1-32-NEXT: [[TMP67:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP65]], ptr [[TMP67]], align 4 +// CK1-32-NEXT: [[TMP68:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 0 +// CK1-32-NEXT: store ptr null, ptr [[TMP68]], align 4 +// CK1-32-NEXT: [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CK1-32-NEXT: [[TMP70:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CK1-32-NEXT: [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK1-32-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 +// CK1-32-NEXT: store i32 2, ptr [[TMP71]], align 4 +// CK1-32-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 +// CK1-32-NEXT: store i32 1, ptr [[TMP72]], align 4 +// CK1-32-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 +// CK1-32-NEXT: store ptr [[TMP69]], ptr [[TMP73]], align 4 +// CK1-32-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 3 +// CK1-32-NEXT: store ptr [[TMP70]], ptr [[TMP74]], align 4 +// CK1-32-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 4 +// CK1-32-NEXT: store ptr @.offload_sizes.5, ptr [[TMP75]], align 4 +// CK1-32-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 5 +// CK1-32-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP76]], align 4 +// CK1-32-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 6 +// CK1-32-NEXT: store ptr null, ptr [[TMP77]], align 4 +// CK1-32-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 7 +// CK1-32-NEXT: store ptr null, ptr [[TMP78]], align 4 +// CK1-32-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 8 +// CK1-32-NEXT: store i64 0, ptr [[TMP79]], align 8 +// CK1-32-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 9 +// CK1-32-NEXT: store i64 0, ptr [[TMP80]], align 8 +// CK1-32-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 10 +// CK1-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP81]], align 4 +// CK1-32-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 11 +// CK1-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP82]], align 4 +// CK1-32-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 12 +// CK1-32-NEXT: store i32 0, ptr [[TMP83]], align 4 +// CK1-32-NEXT: [[TMP84:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55.region_id, ptr [[KERNEL_ARGS16]]) +// CK1-32-NEXT: [[TMP85:%.*]] = icmp ne i32 [[TMP84]], 0 +// CK1-32-NEXT: br i1 [[TMP85]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]] +// CK1-32: omp_offload.failed17: +// CK1-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55(ptr [[TMP65]]) #[[ATTR2]] +// CK1-32-NEXT: br label [[OMP_OFFLOAD_CONT18]] +// CK1-32: omp_offload.cont18: +// CK1-32-NEXT: [[TMP86:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// CK1-32-NEXT: store ptr [[TMP86]], ptr [[_TMP19]], align 4 +// CK1-32-NEXT: [[TMP87:%.*]] = load ptr, ptr [[_TMP19]], align 4 +// CK1-32-NEXT: [[TMP88:%.*]] = load ptr, ptr [[TMP87]], align 4 +// CK1-32-NEXT: [[TMP89:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP88]], ptr [[TMP89]], align 4 +// CK1-32-NEXT: [[TMP90:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP88]], ptr [[TMP90]], align 4 +// CK1-32-NEXT: [[TMP91:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 0 +// CK1-32-NEXT: store ptr null, ptr [[TMP91]], align 4 +// CK1-32-NEXT: [[TMP92:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CK1-32-NEXT: [[TMP93:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CK1-32-NEXT: [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK1-32-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 +// CK1-32-NEXT: store i32 2, ptr [[TMP94]], align 4 +// CK1-32-NEXT: [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 +// CK1-32-NEXT: store i32 1, ptr [[TMP95]], align 4 +// CK1-32-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 +// CK1-32-NEXT: store ptr [[TMP92]], ptr [[TMP96]], align 4 +// CK1-32-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 3 +// CK1-32-NEXT: store ptr [[TMP93]], ptr [[TMP97]], align 4 +// CK1-32-NEXT: [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 4 +// CK1-32-NEXT: store ptr @.offload_sizes.7, ptr [[TMP98]], align 4 +// CK1-32-NEXT: [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 5 +// CK1-32-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP99]], align 4 +// CK1-32-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 6 +// CK1-32-NEXT: store ptr null, ptr [[TMP100]], align 4 +// CK1-32-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 7 +// CK1-32-NEXT: store ptr null, ptr [[TMP101]], align 4 +// CK1-32-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 8 +// CK1-32-NEXT: store i64 0, ptr [[TMP102]], align 8 +// CK1-32-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 9 +// CK1-32-NEXT: store i64 0, ptr [[TMP103]], align 8 +// CK1-32-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 10 +// CK1-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP104]], align 4 +// CK1-32-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 11 +// CK1-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP105]], align 4 +// CK1-32-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 12 +// CK1-32-NEXT: store i32 0, ptr [[TMP106]], align 4 +// CK1-32-NEXT: [[TMP107:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61.region_id, ptr [[KERNEL_ARGS23]]) +// CK1-32-NEXT: [[TMP108:%.*]] = icmp ne i32 [[TMP107]], 0 +// CK1-32-NEXT: br i1 [[TMP108]], label [[OMP_OFFLOAD_FAILED24:%.*]], label [[OMP_OFFLOAD_CONT25:%.*]] +// CK1-32: omp_offload.failed24: +// CK1-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61(ptr [[TMP88]]) #[[ATTR2]] +// CK1-32-NEXT: br label [[OMP_OFFLOAD_CONT25]] +// CK1-32: omp_offload.cont25: +// CK1-32-NEXT: [[TMP109:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// CK1-32-NEXT: store ptr [[TMP109]], ptr [[_TMP26]], align 4 +// CK1-32-NEXT: [[TMP110:%.*]] = load ptr, ptr [[_TMP26]], align 4 +// CK1-32-NEXT: [[TMP111:%.*]] = load ptr, ptr [[TMP110]], align 4 +// CK1-32-NEXT: [[TMP112:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP111]], ptr [[TMP112]], align 4 +// CK1-32-NEXT: [[TMP113:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP111]], ptr [[TMP113]], align 4 +// CK1-32-NEXT: [[TMP114:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i32 0, i32 0 +// CK1-32-NEXT: store ptr null, ptr [[TMP114]], align 4 +// CK1-32-NEXT: [[TMP115:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 +// CK1-32-NEXT: [[TMP116:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 +// CK1-32-NEXT: [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK1-32-NEXT: [[TMP117:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 +// CK1-32-NEXT: store i32 2, ptr [[TMP117]], align 4 +// CK1-32-NEXT: [[TMP118:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 +// CK1-32-NEXT: store i32 1, ptr [[TMP118]], align 4 +// CK1-32-NEXT: [[TMP119:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 +// CK1-32-NEXT: store ptr [[TMP115]], ptr [[TMP119]], align 4 +// CK1-32-NEXT: [[TMP120:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 3 +// CK1-32-NEXT: store ptr [[TMP116]], ptr [[TMP120]], align 4 +// CK1-32-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 4 +// CK1-32-NEXT: store ptr @.offload_sizes.9, ptr [[TMP121]], align 4 +// CK1-32-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 5 +// CK1-32-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP122]], align 4 +// CK1-32-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 6 +// CK1-32-NEXT: store ptr null, ptr [[TMP123]], align 4 +// CK1-32-NEXT: [[TMP124:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 7 +// CK1-32-NEXT: store ptr null, ptr [[TMP124]], align 4 +// CK1-32-NEXT: [[TMP125:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 8 +// CK1-32-NEXT: store i64 0, ptr [[TMP125]], align 8 +// CK1-32-NEXT: [[TMP126:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 9 +// CK1-32-NEXT: store i64 0, ptr [[TMP126]], align 8 +// CK1-32-NEXT: [[TMP127:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 10 +// CK1-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP127]], align 4 +// CK1-32-NEXT: [[TMP128:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 11 +// CK1-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP128]], align 4 +// CK1-32-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 12 +// CK1-32-NEXT: store i32 0, ptr [[TMP129]], align 4 +// CK1-32-NEXT: [[TMP130:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67.region_id, ptr [[KERNEL_ARGS30]]) +// CK1-32-NEXT: [[TMP131:%.*]] = icmp ne i32 [[TMP130]], 0 +// CK1-32-NEXT: br i1 [[TMP131]], label [[OMP_OFFLOAD_FAILED31:%.*]], label [[OMP_OFFLOAD_CONT32:%.*]] +// CK1-32: omp_offload.failed31: +// CK1-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67(ptr [[TMP111]]) #[[ATTR2]] +// CK1-32-NEXT: br label [[OMP_OFFLOAD_CONT32]] +// CK1-32: omp_offload.cont32: +// CK1-32-NEXT: [[TMP132:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// CK1-32-NEXT: store ptr [[TMP132]], ptr [[_TMP33]], align 4 +// CK1-32-NEXT: [[TMP133:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// CK1-32-NEXT: store ptr [[TMP133]], ptr [[_TMP34]], align 4 +// CK1-32-NEXT: [[TMP134:%.*]] = load ptr, ptr [[_TMP33]], align 4 +// CK1-32-NEXT: [[TMP135:%.*]] = load ptr, ptr [[TMP134]], align 4 +// CK1-32-NEXT: [[TMP136:%.*]] = load ptr, ptr [[_TMP34]], align 4 +// CK1-32-NEXT: [[TMP137:%.*]] = load ptr, ptr [[TMP136]], align 4 +// CK1-32-NEXT: [[TMP138:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP135]], ptr [[TMP138]], align 4 +// CK1-32-NEXT: [[TMP139:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP135]], ptr [[TMP139]], align 4 +// CK1-32-NEXT: [[TMP140:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS37]], i32 0, i32 0 +// CK1-32-NEXT: store ptr null, ptr [[TMP140]], align 4 +// CK1-32-NEXT: [[TMP141:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 1 +// CK1-32-NEXT: store ptr [[TMP137]], ptr [[TMP141]], align 4 +// CK1-32-NEXT: [[TMP142:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 1 +// CK1-32-NEXT: store ptr [[TMP137]], ptr [[TMP142]], align 4 +// CK1-32-NEXT: [[TMP143:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS37]], i32 0, i32 1 +// CK1-32-NEXT: store ptr null, ptr [[TMP143]], align 4 +// CK1-32-NEXT: [[TMP144:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 +// CK1-32-NEXT: [[TMP145:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 +// CK1-32-NEXT: [[KERNEL_ARGS38:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK1-32-NEXT: [[TMP146:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 0 +// CK1-32-NEXT: store i32 2, ptr [[TMP146]], align 4 +// CK1-32-NEXT: [[TMP147:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 1 +// CK1-32-NEXT: store i32 2, ptr [[TMP147]], align 4 +// CK1-32-NEXT: [[TMP148:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 2 +// CK1-32-NEXT: store ptr [[TMP144]], ptr [[TMP148]], align 4 +// CK1-32-NEXT: [[TMP149:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 3 +// CK1-32-NEXT: store ptr [[TMP145]], ptr [[TMP149]], align 4 +// CK1-32-NEXT: [[TMP150:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 4 +// CK1-32-NEXT: store ptr @.offload_sizes.11, ptr [[TMP150]], align 4 +// CK1-32-NEXT: [[TMP151:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 5 +// CK1-32-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP151]], align 4 +// CK1-32-NEXT: [[TMP152:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 6 +// CK1-32-NEXT: store ptr null, ptr [[TMP152]], align 4 +// CK1-32-NEXT: [[TMP153:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 7 +// CK1-32-NEXT: store ptr null, ptr [[TMP153]], align 4 +// CK1-32-NEXT: [[TMP154:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 8 +// CK1-32-NEXT: store i64 0, ptr [[TMP154]], align 8 +// CK1-32-NEXT: [[TMP155:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 9 +// CK1-32-NEXT: store i64 0, ptr [[TMP155]], align 8 +// CK1-32-NEXT: [[TMP156:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 10 +// CK1-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP156]], align 4 +// CK1-32-NEXT: [[TMP157:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 11 +// CK1-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP157]], align 4 +// CK1-32-NEXT: [[TMP158:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 12 +// CK1-32-NEXT: store i32 0, ptr [[TMP158]], align 4 +// CK1-32-NEXT: [[TMP159:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74.region_id, ptr [[KERNEL_ARGS38]]) +// CK1-32-NEXT: [[TMP160:%.*]] = icmp ne i32 [[TMP159]], 0 +// CK1-32-NEXT: br i1 [[TMP160]], label [[OMP_OFFLOAD_FAILED39:%.*]], label [[OMP_OFFLOAD_CONT40:%.*]] +// CK1-32: omp_offload.failed39: +// CK1-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74(ptr [[TMP135]], ptr [[TMP137]]) #[[ATTR2]] +// CK1-32-NEXT: br label [[OMP_OFFLOAD_CONT40]] +// CK1-32: omp_offload.cont40: +// CK1-32-NEXT: ret void +// CK1-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37 +// CK1-32-SAME: (ptr noundef [[G:%.*]]) #[[ATTR1:[0-9]+]] { +// CK1-32-NEXT: entry: +// CK1-32-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 +// CK1-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 +// CK1-32-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK1-32-NEXT: store ptr [[INCDEC_PTR]], ptr [[G_ADDR]], align 4 +// CK1-32-NEXT: ret void +// CK1-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43 +// CK1-32-SAME: (ptr noundef [[L:%.*]]) #[[ATTR1]] { +// CK1-32-NEXT: entry: +// CK1-32-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 4 +// CK1-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[L_ADDR]], align 4 +// CK1-32-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 1 +// CK1-32-NEXT: store ptr [[INCDEC_PTR]], ptr [[L_ADDR]], align 4 +// CK1-32-NEXT: ret void +// CK1-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49 +// CK1-32-SAME: (ptr noundef [[T:%.*]]) #[[ATTR1]] { +// CK1-32-NEXT: entry: +// CK1-32-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 4 +// CK1-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 4 +// CK1-32-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 +// CK1-32-NEXT: store ptr [[INCDEC_PTR]], ptr [[T_ADDR]], align 4 +// CK1-32-NEXT: ret void +// CK1-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55 +// CK1-32-SAME: (ptr noundef [[LR:%.*]]) #[[ATTR1]] { +// CK1-32-NEXT: entry: +// CK1-32-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 4 +// CK1-32-NEXT: store ptr [[LR_ADDR]], ptr [[TMP]], align 4 +// CK1-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK1-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 +// CK1-32-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// CK1-32-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 +// CK1-32-NEXT: ret void +// CK1-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61 +// CK1-32-SAME: (ptr noundef [[TR:%.*]]) #[[ATTR1]] { +// CK1-32-NEXT: entry: +// CK1-32-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// CK1-32-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 4 +// CK1-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK1-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 +// CK1-32-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK1-32-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 +// CK1-32-NEXT: ret void +// CK1-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67 +// CK1-32-SAME: (ptr noundef [[TR:%.*]]) #[[ATTR1]] { +// CK1-32-NEXT: entry: +// CK1-32-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// CK1-32-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 4 +// CK1-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK1-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 +// CK1-32-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK1-32-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 +// CK1-32-NEXT: ret void +// CK1-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74 +// CK1-32-SAME: (ptr noundef [[TR:%.*]], ptr noundef [[LR:%.*]]) #[[ATTR1]] { +// CK1-32-NEXT: entry: +// CK1-32-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// CK1-32-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 4 +// CK1-32-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 4 +// CK1-32-NEXT: store ptr [[LR_ADDR]], ptr [[_TMP1]], align 4 +// CK1-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK1-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 +// CK1-32-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK1-32-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 +// CK1-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CK1-32-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CK1-32-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 1 +// CK1-32-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP2]], align 4 +// CK1-32-NEXT: ret void +// CK1-32-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK1-32-SAME: () #[[ATTR3:[0-9]+]] { +// CK1-32-NEXT: entry: +// CK1-32-NEXT: call void @__tgt_register_requires(i64 1) +// CK1-32-NEXT: ret void +// CK2-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK2-SAME: () #[[ATTR5:[0-9]+]] { +// CK2-NEXT: entry: +// CK2-NEXT: call void @__tgt_register_requires(i64 1) +// CK2-NEXT: ret void +// CK3-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK3-SAME: () #[[ATTR3:[0-9]+]] { +// CK3-NEXT: entry: +// CK3-NEXT: call void @__tgt_register_requires(i64 1) +// CK3-NEXT: ret void +// CK10-LABEL: define {{[^@]+}}@_Z3barRPfRPi +// CK10-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CK10-NEXT: entry: +// CK10-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CK10-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CK10-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CK10-NEXT: call void @_Z3fooIiEvRPfRPT_(ptr noundef nonnull align 8 dereferenceable(8) [[TMP0]], ptr noundef nonnull align 8 dereferenceable(8) [[TMP1]]) +// CK10-NEXT: ret void +// +// +// CK10-LABEL: define {{[^@]+}}@_Z3fooIiEvRPfRPT_ +// CK10-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[LR:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[TR:%.*]]) #[[ATTR0]] comdat { +// CK10-NEXT: entry: +// CK10-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[L:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[T:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK10-NEXT: [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK10-NEXT: [[DOTOFFLOAD_BASEPTRS7:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_PTRS8:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_MAPPERS9:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK10-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[DOTOFFLOAD_BASEPTRS13:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_PTRS14:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_MAPPERS15:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK10-NEXT: [[_TMP19:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[DOTOFFLOAD_BASEPTRS20:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_PTRS21:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_MAPPERS22:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK10-NEXT: [[_TMP26:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK10-NEXT: [[_TMP33:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[_TMP34:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[DOTOFFLOAD_BASEPTRS35:%.*]] = alloca [2 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_PTRS36:%.*]] = alloca [2 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_MAPPERS37:%.*]] = alloca [2 x ptr], align 8 +// CK10-NEXT: [[KERNEL_ARGS38:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK10-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 8 +// CK10-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// CK10-NEXT: [[TMP0:%.*]] = load ptr, ptr @g, align 8 +// CK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CK10-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CK10-NEXT: store ptr null, ptr [[TMP3]], align 8 +// CK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK10-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK10-NEXT: store i32 2, ptr [[TMP6]], align 4 +// CK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK10-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CK10-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK10-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK10-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CK10-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK10-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 8 +// CK10-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK10-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 8 +// CK10-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK10-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CK10-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK10-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CK10-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK10-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK10-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK10-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CK10-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK10-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP16]], align 4 +// CK10-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK10-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CK10-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK10-NEXT: store i32 0, ptr [[TMP18]], align 4 +// CK10-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37.region_id, ptr [[KERNEL_ARGS]]) +// CK10-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CK10-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK10: omp_offload.failed: +// CK10-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37(ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CK10-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK10: omp_offload.cont: +// CK10-NEXT: [[TMP21:%.*]] = load ptr, ptr [[L]], align 8 +// CK10-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP21]], ptr [[TMP22]], align 8 +// CK10-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP21]], ptr [[TMP23]], align 8 +// CK10-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 +// CK10-NEXT: store ptr null, ptr [[TMP24]], align 8 +// CK10-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CK10-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CK10-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CK10-NEXT: store i32 2, ptr [[TMP27]], align 4 +// CK10-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 +// CK10-NEXT: store i32 1, ptr [[TMP28]], align 4 +// CK10-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CK10-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 8 +// CK10-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 +// CK10-NEXT: store ptr [[TMP26]], ptr [[TMP30]], align 8 +// CK10-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 +// CK10-NEXT: store ptr @.offload_sizes.1, ptr [[TMP31]], align 8 +// CK10-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 +// CK10-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP32]], align 8 +// CK10-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CK10-NEXT: store ptr null, ptr [[TMP33]], align 8 +// CK10-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 +// CK10-NEXT: store ptr null, ptr [[TMP34]], align 8 +// CK10-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 +// CK10-NEXT: store i64 0, ptr [[TMP35]], align 8 +// CK10-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 9 +// CK10-NEXT: store i64 0, ptr [[TMP36]], align 8 +// CK10-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 10 +// CK10-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP37]], align 4 +// CK10-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 11 +// CK10-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP38]], align 4 +// CK10-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 12 +// CK10-NEXT: store i32 0, ptr [[TMP39]], align 4 +// CK10-NEXT: [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43.region_id, ptr [[KERNEL_ARGS4]]) +// CK10-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CK10-NEXT: br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] +// CK10: omp_offload.failed5: +// CK10-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43(ptr [[TMP21]]) #[[ATTR2]] +// CK10-NEXT: br label [[OMP_OFFLOAD_CONT6]] +// CK10: omp_offload.cont6: +// CK10-NEXT: [[TMP42:%.*]] = load ptr, ptr [[T]], align 8 +// CK10-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP42]], ptr [[TMP43]], align 8 +// CK10-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP42]], ptr [[TMP44]], align 8 +// CK10-NEXT: [[TMP45:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0 +// CK10-NEXT: store ptr null, ptr [[TMP45]], align 8 +// CK10-NEXT: [[TMP46:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CK10-NEXT: [[TMP47:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CK10-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 +// CK10-NEXT: store i32 2, ptr [[TMP48]], align 4 +// CK10-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 +// CK10-NEXT: store i32 1, ptr [[TMP49]], align 4 +// CK10-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 +// CK10-NEXT: store ptr [[TMP46]], ptr [[TMP50]], align 8 +// CK10-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 +// CK10-NEXT: store ptr [[TMP47]], ptr [[TMP51]], align 8 +// CK10-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 +// CK10-NEXT: store ptr @.offload_sizes.3, ptr [[TMP52]], align 8 +// CK10-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 +// CK10-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP53]], align 8 +// CK10-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 +// CK10-NEXT: store ptr null, ptr [[TMP54]], align 8 +// CK10-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 +// CK10-NEXT: store ptr null, ptr [[TMP55]], align 8 +// CK10-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 +// CK10-NEXT: store i64 0, ptr [[TMP56]], align 8 +// CK10-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 9 +// CK10-NEXT: store i64 0, ptr [[TMP57]], align 8 +// CK10-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 10 +// CK10-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP58]], align 4 +// CK10-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 11 +// CK10-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP59]], align 4 +// CK10-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 12 +// CK10-NEXT: store i32 0, ptr [[TMP60]], align 4 +// CK10-NEXT: [[TMP61:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49.region_id, ptr [[KERNEL_ARGS10]]) +// CK10-NEXT: [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0 +// CK10-NEXT: br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] +// CK10: omp_offload.failed11: +// CK10-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49(ptr [[TMP42]]) #[[ATTR2]] +// CK10-NEXT: br label [[OMP_OFFLOAD_CONT12]] +// CK10: omp_offload.cont12: +// CK10-NEXT: [[TMP63:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// CK10-NEXT: store ptr [[TMP63]], ptr [[TMP]], align 8 +// CK10-NEXT: [[TMP64:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK10-NEXT: [[TMP65:%.*]] = load ptr, ptr [[TMP64]], align 8 +// CK10-NEXT: [[TMP66:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP65]], ptr [[TMP66]], align 8 +// CK10-NEXT: [[TMP67:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP65]], ptr [[TMP67]], align 8 +// CK10-NEXT: [[TMP68:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS15]], i64 0, i64 0 +// CK10-NEXT: store ptr null, ptr [[TMP68]], align 8 +// CK10-NEXT: [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CK10-NEXT: [[TMP70:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CK10-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 +// CK10-NEXT: store i32 2, ptr [[TMP71]], align 4 +// CK10-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 +// CK10-NEXT: store i32 1, ptr [[TMP72]], align 4 +// CK10-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 +// CK10-NEXT: store ptr [[TMP69]], ptr [[TMP73]], align 8 +// CK10-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 3 +// CK10-NEXT: store ptr [[TMP70]], ptr [[TMP74]], align 8 +// CK10-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 4 +// CK10-NEXT: store ptr @.offload_sizes.5, ptr [[TMP75]], align 8 +// CK10-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 5 +// CK10-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP76]], align 8 +// CK10-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 6 +// CK10-NEXT: store ptr null, ptr [[TMP77]], align 8 +// CK10-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 7 +// CK10-NEXT: store ptr null, ptr [[TMP78]], align 8 +// CK10-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 8 +// CK10-NEXT: store i64 0, ptr [[TMP79]], align 8 +// CK10-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 9 +// CK10-NEXT: store i64 0, ptr [[TMP80]], align 8 +// CK10-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 10 +// CK10-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP81]], align 4 +// CK10-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 11 +// CK10-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP82]], align 4 +// CK10-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 12 +// CK10-NEXT: store i32 0, ptr [[TMP83]], align 4 +// CK10-NEXT: [[TMP84:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55.region_id, ptr [[KERNEL_ARGS16]]) +// CK10-NEXT: [[TMP85:%.*]] = icmp ne i32 [[TMP84]], 0 +// CK10-NEXT: br i1 [[TMP85]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]] +// CK10: omp_offload.failed17: +// CK10-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55(ptr [[TMP65]]) #[[ATTR2]] +// CK10-NEXT: br label [[OMP_OFFLOAD_CONT18]] +// CK10: omp_offload.cont18: +// CK10-NEXT: [[TMP86:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// CK10-NEXT: store ptr [[TMP86]], ptr [[_TMP19]], align 8 +// CK10-NEXT: [[TMP87:%.*]] = load ptr, ptr [[_TMP19]], align 8 +// CK10-NEXT: [[TMP88:%.*]] = load ptr, ptr [[TMP87]], align 8 +// CK10-NEXT: [[TMP89:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP88]], ptr [[TMP89]], align 8 +// CK10-NEXT: [[TMP90:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP88]], ptr [[TMP90]], align 8 +// CK10-NEXT: [[TMP91:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0 +// CK10-NEXT: store ptr null, ptr [[TMP91]], align 8 +// CK10-NEXT: [[TMP92:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CK10-NEXT: [[TMP93:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CK10-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 +// CK10-NEXT: store i32 2, ptr [[TMP94]], align 4 +// CK10-NEXT: [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 +// CK10-NEXT: store i32 1, ptr [[TMP95]], align 4 +// CK10-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 +// CK10-NEXT: store ptr [[TMP92]], ptr [[TMP96]], align 8 +// CK10-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 3 +// CK10-NEXT: store ptr [[TMP93]], ptr [[TMP97]], align 8 +// CK10-NEXT: [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 4 +// CK10-NEXT: store ptr @.offload_sizes.7, ptr [[TMP98]], align 8 +// CK10-NEXT: [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 5 +// CK10-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP99]], align 8 +// CK10-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 6 +// CK10-NEXT: store ptr null, ptr [[TMP100]], align 8 +// CK10-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 7 +// CK10-NEXT: store ptr null, ptr [[TMP101]], align 8 +// CK10-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 8 +// CK10-NEXT: store i64 0, ptr [[TMP102]], align 8 +// CK10-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 9 +// CK10-NEXT: store i64 0, ptr [[TMP103]], align 8 +// CK10-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 10 +// CK10-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP104]], align 4 +// CK10-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 11 +// CK10-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP105]], align 4 +// CK10-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 12 +// CK10-NEXT: store i32 0, ptr [[TMP106]], align 4 +// CK10-NEXT: [[TMP107:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61.region_id, ptr [[KERNEL_ARGS23]]) +// CK10-NEXT: [[TMP108:%.*]] = icmp ne i32 [[TMP107]], 0 +// CK10-NEXT: br i1 [[TMP108]], label [[OMP_OFFLOAD_FAILED24:%.*]], label [[OMP_OFFLOAD_CONT25:%.*]] +// CK10: omp_offload.failed24: +// CK10-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61(ptr [[TMP88]]) #[[ATTR2]] +// CK10-NEXT: br label [[OMP_OFFLOAD_CONT25]] +// CK10: omp_offload.cont25: +// CK10-NEXT: [[TMP109:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// CK10-NEXT: store ptr [[TMP109]], ptr [[_TMP26]], align 8 +// CK10-NEXT: [[TMP110:%.*]] = load ptr, ptr [[_TMP26]], align 8 +// CK10-NEXT: [[TMP111:%.*]] = load ptr, ptr [[TMP110]], align 8 +// CK10-NEXT: [[TMP112:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP111]], ptr [[TMP112]], align 8 +// CK10-NEXT: [[TMP113:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP111]], ptr [[TMP113]], align 8 +// CK10-NEXT: [[TMP114:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i64 0, i64 0 +// CK10-NEXT: store ptr null, ptr [[TMP114]], align 8 +// CK10-NEXT: [[TMP115:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 +// CK10-NEXT: [[TMP116:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 +// CK10-NEXT: [[TMP117:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 +// CK10-NEXT: store i32 2, ptr [[TMP117]], align 4 +// CK10-NEXT: [[TMP118:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 +// CK10-NEXT: store i32 1, ptr [[TMP118]], align 4 +// CK10-NEXT: [[TMP119:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 +// CK10-NEXT: store ptr [[TMP115]], ptr [[TMP119]], align 8 +// CK10-NEXT: [[TMP120:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 3 +// CK10-NEXT: store ptr [[TMP116]], ptr [[TMP120]], align 8 +// CK10-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 4 +// CK10-NEXT: store ptr @.offload_sizes.9, ptr [[TMP121]], align 8 +// CK10-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 5 +// CK10-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP122]], align 8 +// CK10-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 6 +// CK10-NEXT: store ptr null, ptr [[TMP123]], align 8 +// CK10-NEXT: [[TMP124:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 7 +// CK10-NEXT: store ptr null, ptr [[TMP124]], align 8 +// CK10-NEXT: [[TMP125:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 8 +// CK10-NEXT: store i64 0, ptr [[TMP125]], align 8 +// CK10-NEXT: [[TMP126:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 9 +// CK10-NEXT: store i64 0, ptr [[TMP126]], align 8 +// CK10-NEXT: [[TMP127:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 10 +// CK10-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP127]], align 4 +// CK10-NEXT: [[TMP128:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 11 +// CK10-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP128]], align 4 +// CK10-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 12 +// CK10-NEXT: store i32 0, ptr [[TMP129]], align 4 +// CK10-NEXT: [[TMP130:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67.region_id, ptr [[KERNEL_ARGS30]]) +// CK10-NEXT: [[TMP131:%.*]] = icmp ne i32 [[TMP130]], 0 +// CK10-NEXT: br i1 [[TMP131]], label [[OMP_OFFLOAD_FAILED31:%.*]], label [[OMP_OFFLOAD_CONT32:%.*]] +// CK10: omp_offload.failed31: +// CK10-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67(ptr [[TMP111]]) #[[ATTR2]] +// CK10-NEXT: br label [[OMP_OFFLOAD_CONT32]] +// CK10: omp_offload.cont32: +// CK10-NEXT: [[TMP132:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// CK10-NEXT: store ptr [[TMP132]], ptr [[_TMP33]], align 8 +// CK10-NEXT: [[TMP133:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// CK10-NEXT: store ptr [[TMP133]], ptr [[_TMP34]], align 8 +// CK10-NEXT: [[TMP134:%.*]] = load ptr, ptr [[_TMP33]], align 8 +// CK10-NEXT: [[TMP135:%.*]] = load ptr, ptr [[TMP134]], align 8 +// CK10-NEXT: [[TMP136:%.*]] = load ptr, ptr [[_TMP34]], align 8 +// CK10-NEXT: [[TMP137:%.*]] = load ptr, ptr [[TMP136]], align 8 +// CK10-NEXT: [[TMP138:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP135]], ptr [[TMP138]], align 8 +// CK10-NEXT: [[TMP139:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP135]], ptr [[TMP139]], align 8 +// CK10-NEXT: [[TMP140:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS37]], i64 0, i64 0 +// CK10-NEXT: store ptr null, ptr [[TMP140]], align 8 +// CK10-NEXT: [[TMP141:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 1 +// CK10-NEXT: store ptr [[TMP137]], ptr [[TMP141]], align 8 +// CK10-NEXT: [[TMP142:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 1 +// CK10-NEXT: store ptr [[TMP137]], ptr [[TMP142]], align 8 +// CK10-NEXT: [[TMP143:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS37]], i64 0, i64 1 +// CK10-NEXT: store ptr null, ptr [[TMP143]], align 8 +// CK10-NEXT: [[TMP144:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 +// CK10-NEXT: [[TMP145:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 +// CK10-NEXT: [[TMP146:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 0 +// CK10-NEXT: store i32 2, ptr [[TMP146]], align 4 +// CK10-NEXT: [[TMP147:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 1 +// CK10-NEXT: store i32 2, ptr [[TMP147]], align 4 +// CK10-NEXT: [[TMP148:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 2 +// CK10-NEXT: store ptr [[TMP144]], ptr [[TMP148]], align 8 +// CK10-NEXT: [[TMP149:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 3 +// CK10-NEXT: store ptr [[TMP145]], ptr [[TMP149]], align 8 +// CK10-NEXT: [[TMP150:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 4 +// CK10-NEXT: store ptr @.offload_sizes.11, ptr [[TMP150]], align 8 +// CK10-NEXT: [[TMP151:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 5 +// CK10-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP151]], align 8 +// CK10-NEXT: [[TMP152:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 6 +// CK10-NEXT: store ptr null, ptr [[TMP152]], align 8 +// CK10-NEXT: [[TMP153:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 7 +// CK10-NEXT: store ptr null, ptr [[TMP153]], align 8 +// CK10-NEXT: [[TMP154:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 8 +// CK10-NEXT: store i64 0, ptr [[TMP154]], align 8 +// CK10-NEXT: [[TMP155:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 9 +// CK10-NEXT: store i64 0, ptr [[TMP155]], align 8 +// CK10-NEXT: [[TMP156:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 10 +// CK10-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP156]], align 4 +// CK10-NEXT: [[TMP157:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 11 +// CK10-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP157]], align 4 +// CK10-NEXT: [[TMP158:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 12 +// CK10-NEXT: store i32 0, ptr [[TMP158]], align 4 +// CK10-NEXT: [[TMP159:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74.region_id, ptr [[KERNEL_ARGS38]]) +// CK10-NEXT: [[TMP160:%.*]] = icmp ne i32 [[TMP159]], 0 +// CK10-NEXT: br i1 [[TMP160]], label [[OMP_OFFLOAD_FAILED39:%.*]], label [[OMP_OFFLOAD_CONT40:%.*]] +// CK10: omp_offload.failed39: +// CK10-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74(ptr [[TMP135]], ptr [[TMP137]]) #[[ATTR2]] +// CK10-NEXT: br label [[OMP_OFFLOAD_CONT40]] +// CK10: omp_offload.cont40: +// CK10-NEXT: ret void +// +// +// CK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37 +// CK10-SAME: (ptr noundef [[G:%.*]]) #[[ATTR1:[0-9]+]] { +// CK10-NEXT: entry: +// CK10-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CK10-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 +// CK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK10-NEXT: store ptr [[INCDEC_PTR]], ptr [[G_ADDR]], align 8 +// CK10-NEXT: ret void +// +// +// CK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43 +// CK10-SAME: (ptr noundef [[L:%.*]]) #[[ATTR1]] { +// CK10-NEXT: entry: +// CK10-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 8 +// CK10-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 8 +// CK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[L_ADDR]], align 8 +// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 1 +// CK10-NEXT: store ptr [[INCDEC_PTR]], ptr [[L_ADDR]], align 8 +// CK10-NEXT: ret void +// +// +// CK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49 +// CK10-SAME: (ptr noundef [[T:%.*]]) #[[ATTR1]] { +// CK10-NEXT: entry: +// CK10-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 8 +// CK10-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8 +// CK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8 +// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 +// CK10-NEXT: store ptr [[INCDEC_PTR]], ptr [[T_ADDR]], align 8 +// CK10-NEXT: ret void +// +// +// CK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55 +// CK10-SAME: (ptr noundef [[LR:%.*]]) #[[ATTR1]] { +// CK10-NEXT: entry: +// CK10-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK10-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 8 +// CK10-NEXT: store ptr [[LR_ADDR]], ptr [[TMP]], align 8 +// CK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// CK10-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 +// CK10-NEXT: ret void +// +// +// CK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61 +// CK10-SAME: (ptr noundef [[TR:%.*]]) #[[ATTR1]] { +// CK10-NEXT: entry: +// CK10-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK10-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// CK10-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 8 +// CK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK10-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 +// CK10-NEXT: ret void +// +// +// CK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67 +// CK10-SAME: (ptr noundef [[TR:%.*]]) #[[ATTR1]] { +// CK10-NEXT: entry: +// CK10-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK10-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// CK10-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 8 +// CK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK10-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 +// CK10-NEXT: ret void +// +// +// CK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74 +// CK10-SAME: (ptr noundef [[TR:%.*]], ptr noundef [[LR:%.*]]) #[[ATTR1]] { +// CK10-NEXT: entry: +// CK10-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 +// CK10-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// CK10-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 8 +// CK10-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 8 +// CK10-NEXT: store ptr [[LR_ADDR]], ptr [[_TMP1]], align 8 +// CK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK10-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 +// CK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CK10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CK10-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 1 +// CK10-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP2]], align 8 +// CK10-NEXT: ret void +// +// +// CK10-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK10-SAME: () #[[ATTR3:[0-9]+]] { +// CK10-NEXT: entry: +// CK10-NEXT: call void @__tgt_register_requires(i64 1) +// CK10-NEXT: ret void +// +// +// CK11-LABEL: define {{[^@]+}}@_Z3barRPfRPi +// CK11-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CK11-NEXT: entry: +// CK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CK11-NEXT: call void @_Z3fooIiEvRPfRPT_(ptr noundef nonnull align 8 dereferenceable(8) [[TMP0]], ptr noundef nonnull align 8 dereferenceable(8) [[TMP1]]) +// CK11-NEXT: ret void +// +// +// CK11-LABEL: define {{[^@]+}}@_Z3fooIiEvRPfRPT_ +// CK11-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[LR:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[TR:%.*]]) #[[ATTR0]] comdat { +// CK11-NEXT: entry: +// CK11-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[L:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[T:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK11-NEXT: [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK11-NEXT: [[DOTOFFLOAD_BASEPTRS7:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_PTRS8:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_MAPPERS9:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK11-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[DOTOFFLOAD_BASEPTRS13:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_PTRS14:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_MAPPERS15:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK11-NEXT: [[_TMP19:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[DOTOFFLOAD_BASEPTRS20:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_PTRS21:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_MAPPERS22:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK11-NEXT: [[_TMP26:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK11-NEXT: [[_TMP33:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[_TMP34:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[DOTOFFLOAD_BASEPTRS35:%.*]] = alloca [2 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_PTRS36:%.*]] = alloca [2 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_MAPPERS37:%.*]] = alloca [2 x ptr], align 8 +// CK11-NEXT: [[KERNEL_ARGS38:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK11-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 8 +// CK11-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// CK11-NEXT: [[TMP0:%.*]] = load ptr, ptr @g, align 8 +// CK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CK11-NEXT: store ptr null, ptr [[TMP3]], align 8 +// CK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK11-NEXT: store i32 2, ptr [[TMP6]], align 4 +// CK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK11-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK11-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK11-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK11-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 8 +// CK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 8 +// CK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK11-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK11-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK11-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK11-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK11-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP16]], align 4 +// CK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK11-NEXT: store i32 0, ptr [[TMP18]], align 4 +// CK11-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37.region_id, ptr [[KERNEL_ARGS]]) +// CK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CK11-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK11: omp_offload.failed: +// CK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37(ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CK11-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK11: omp_offload.cont: +// CK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[L]], align 8 +// CK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP21]], ptr [[TMP22]], align 8 +// CK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP21]], ptr [[TMP23]], align 8 +// CK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 +// CK11-NEXT: store ptr null, ptr [[TMP24]], align 8 +// CK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CK11-NEXT: store i32 2, ptr [[TMP27]], align 4 +// CK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 +// CK11-NEXT: store i32 1, ptr [[TMP28]], align 4 +// CK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CK11-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 8 +// CK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 +// CK11-NEXT: store ptr [[TMP26]], ptr [[TMP30]], align 8 +// CK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 +// CK11-NEXT: store ptr @.offload_sizes.1, ptr [[TMP31]], align 8 +// CK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 +// CK11-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP32]], align 8 +// CK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CK11-NEXT: store ptr null, ptr [[TMP33]], align 8 +// CK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 +// CK11-NEXT: store ptr null, ptr [[TMP34]], align 8 +// CK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 +// CK11-NEXT: store i64 0, ptr [[TMP35]], align 8 +// CK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 9 +// CK11-NEXT: store i64 0, ptr [[TMP36]], align 8 +// CK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 10 +// CK11-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP37]], align 4 +// CK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 11 +// CK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP38]], align 4 +// CK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 12 +// CK11-NEXT: store i32 0, ptr [[TMP39]], align 4 +// CK11-NEXT: [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43.region_id, ptr [[KERNEL_ARGS4]]) +// CK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CK11-NEXT: br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] +// CK11: omp_offload.failed5: +// CK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43(ptr [[TMP21]]) #[[ATTR2]] +// CK11-NEXT: br label [[OMP_OFFLOAD_CONT6]] +// CK11: omp_offload.cont6: +// CK11-NEXT: [[TMP42:%.*]] = load ptr, ptr [[T]], align 8 +// CK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP42]], ptr [[TMP43]], align 8 +// CK11-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP42]], ptr [[TMP44]], align 8 +// CK11-NEXT: [[TMP45:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0 +// CK11-NEXT: store ptr null, ptr [[TMP45]], align 8 +// CK11-NEXT: [[TMP46:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CK11-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 +// CK11-NEXT: store i32 2, ptr [[TMP48]], align 4 +// CK11-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 +// CK11-NEXT: store i32 1, ptr [[TMP49]], align 4 +// CK11-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 +// CK11-NEXT: store ptr [[TMP46]], ptr [[TMP50]], align 8 +// CK11-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 +// CK11-NEXT: store ptr [[TMP47]], ptr [[TMP51]], align 8 +// CK11-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 +// CK11-NEXT: store ptr @.offload_sizes.3, ptr [[TMP52]], align 8 +// CK11-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 +// CK11-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP53]], align 8 +// CK11-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 +// CK11-NEXT: store ptr null, ptr [[TMP54]], align 8 +// CK11-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 +// CK11-NEXT: store ptr null, ptr [[TMP55]], align 8 +// CK11-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 +// CK11-NEXT: store i64 0, ptr [[TMP56]], align 8 +// CK11-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 9 +// CK11-NEXT: store i64 0, ptr [[TMP57]], align 8 +// CK11-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 10 +// CK11-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP58]], align 4 +// CK11-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 11 +// CK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP59]], align 4 +// CK11-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 12 +// CK11-NEXT: store i32 0, ptr [[TMP60]], align 4 +// CK11-NEXT: [[TMP61:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49.region_id, ptr [[KERNEL_ARGS10]]) +// CK11-NEXT: [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0 +// CK11-NEXT: br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] +// CK11: omp_offload.failed11: +// CK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49(ptr [[TMP42]]) #[[ATTR2]] +// CK11-NEXT: br label [[OMP_OFFLOAD_CONT12]] +// CK11: omp_offload.cont12: +// CK11-NEXT: [[TMP63:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// CK11-NEXT: store ptr [[TMP63]], ptr [[TMP]], align 8 +// CK11-NEXT: [[TMP64:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK11-NEXT: [[TMP65:%.*]] = load ptr, ptr [[TMP64]], align 8 +// CK11-NEXT: [[TMP66:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP65]], ptr [[TMP66]], align 8 +// CK11-NEXT: [[TMP67:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP65]], ptr [[TMP67]], align 8 +// CK11-NEXT: [[TMP68:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS15]], i64 0, i64 0 +// CK11-NEXT: store ptr null, ptr [[TMP68]], align 8 +// CK11-NEXT: [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CK11-NEXT: [[TMP70:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CK11-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 +// CK11-NEXT: store i32 2, ptr [[TMP71]], align 4 +// CK11-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 +// CK11-NEXT: store i32 1, ptr [[TMP72]], align 4 +// CK11-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 +// CK11-NEXT: store ptr [[TMP69]], ptr [[TMP73]], align 8 +// CK11-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 3 +// CK11-NEXT: store ptr [[TMP70]], ptr [[TMP74]], align 8 +// CK11-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 4 +// CK11-NEXT: store ptr @.offload_sizes.5, ptr [[TMP75]], align 8 +// CK11-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 5 +// CK11-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP76]], align 8 +// CK11-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 6 +// CK11-NEXT: store ptr null, ptr [[TMP77]], align 8 +// CK11-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 7 +// CK11-NEXT: store ptr null, ptr [[TMP78]], align 8 +// CK11-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 8 +// CK11-NEXT: store i64 0, ptr [[TMP79]], align 8 +// CK11-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 9 +// CK11-NEXT: store i64 0, ptr [[TMP80]], align 8 +// CK11-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 10 +// CK11-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP81]], align 4 +// CK11-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 11 +// CK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP82]], align 4 +// CK11-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 12 +// CK11-NEXT: store i32 0, ptr [[TMP83]], align 4 +// CK11-NEXT: [[TMP84:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55.region_id, ptr [[KERNEL_ARGS16]]) +// CK11-NEXT: [[TMP85:%.*]] = icmp ne i32 [[TMP84]], 0 +// CK11-NEXT: br i1 [[TMP85]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]] +// CK11: omp_offload.failed17: +// CK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55(ptr [[TMP65]]) #[[ATTR2]] +// CK11-NEXT: br label [[OMP_OFFLOAD_CONT18]] +// CK11: omp_offload.cont18: +// CK11-NEXT: [[TMP86:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// CK11-NEXT: store ptr [[TMP86]], ptr [[_TMP19]], align 8 +// CK11-NEXT: [[TMP87:%.*]] = load ptr, ptr [[_TMP19]], align 8 +// CK11-NEXT: [[TMP88:%.*]] = load ptr, ptr [[TMP87]], align 8 +// CK11-NEXT: [[TMP89:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP88]], ptr [[TMP89]], align 8 +// CK11-NEXT: [[TMP90:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP88]], ptr [[TMP90]], align 8 +// CK11-NEXT: [[TMP91:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0 +// CK11-NEXT: store ptr null, ptr [[TMP91]], align 8 +// CK11-NEXT: [[TMP92:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CK11-NEXT: [[TMP93:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CK11-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 +// CK11-NEXT: store i32 2, ptr [[TMP94]], align 4 +// CK11-NEXT: [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 +// CK11-NEXT: store i32 1, ptr [[TMP95]], align 4 +// CK11-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 +// CK11-NEXT: store ptr [[TMP92]], ptr [[TMP96]], align 8 +// CK11-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 3 +// CK11-NEXT: store ptr [[TMP93]], ptr [[TMP97]], align 8 +// CK11-NEXT: [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 4 +// CK11-NEXT: store ptr @.offload_sizes.7, ptr [[TMP98]], align 8 +// CK11-NEXT: [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 5 +// CK11-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP99]], align 8 +// CK11-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 6 +// CK11-NEXT: store ptr null, ptr [[TMP100]], align 8 +// CK11-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 7 +// CK11-NEXT: store ptr null, ptr [[TMP101]], align 8 +// CK11-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 8 +// CK11-NEXT: store i64 0, ptr [[TMP102]], align 8 +// CK11-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 9 +// CK11-NEXT: store i64 0, ptr [[TMP103]], align 8 +// CK11-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 10 +// CK11-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP104]], align 4 +// CK11-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 11 +// CK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP105]], align 4 +// CK11-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 12 +// CK11-NEXT: store i32 0, ptr [[TMP106]], align 4 +// CK11-NEXT: [[TMP107:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61.region_id, ptr [[KERNEL_ARGS23]]) +// CK11-NEXT: [[TMP108:%.*]] = icmp ne i32 [[TMP107]], 0 +// CK11-NEXT: br i1 [[TMP108]], label [[OMP_OFFLOAD_FAILED24:%.*]], label [[OMP_OFFLOAD_CONT25:%.*]] +// CK11: omp_offload.failed24: +// CK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61(ptr [[TMP88]]) #[[ATTR2]] +// CK11-NEXT: br label [[OMP_OFFLOAD_CONT25]] +// CK11: omp_offload.cont25: +// CK11-NEXT: [[TMP109:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// CK11-NEXT: store ptr [[TMP109]], ptr [[_TMP26]], align 8 +// CK11-NEXT: [[TMP110:%.*]] = load ptr, ptr [[_TMP26]], align 8 +// CK11-NEXT: [[TMP111:%.*]] = load ptr, ptr [[TMP110]], align 8 +// CK11-NEXT: [[TMP112:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP111]], ptr [[TMP112]], align 8 +// CK11-NEXT: [[TMP113:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP111]], ptr [[TMP113]], align 8 +// CK11-NEXT: [[TMP114:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i64 0, i64 0 +// CK11-NEXT: store ptr null, ptr [[TMP114]], align 8 +// CK11-NEXT: [[TMP115:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 +// CK11-NEXT: [[TMP116:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 +// CK11-NEXT: [[TMP117:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 +// CK11-NEXT: store i32 2, ptr [[TMP117]], align 4 +// CK11-NEXT: [[TMP118:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 +// CK11-NEXT: store i32 1, ptr [[TMP118]], align 4 +// CK11-NEXT: [[TMP119:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 +// CK11-NEXT: store ptr [[TMP115]], ptr [[TMP119]], align 8 +// CK11-NEXT: [[TMP120:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 3 +// CK11-NEXT: store ptr [[TMP116]], ptr [[TMP120]], align 8 +// CK11-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 4 +// CK11-NEXT: store ptr @.offload_sizes.9, ptr [[TMP121]], align 8 +// CK11-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 5 +// CK11-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP122]], align 8 +// CK11-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 6 +// CK11-NEXT: store ptr null, ptr [[TMP123]], align 8 +// CK11-NEXT: [[TMP124:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 7 +// CK11-NEXT: store ptr null, ptr [[TMP124]], align 8 +// CK11-NEXT: [[TMP125:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 8 +// CK11-NEXT: store i64 0, ptr [[TMP125]], align 8 +// CK11-NEXT: [[TMP126:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 9 +// CK11-NEXT: store i64 0, ptr [[TMP126]], align 8 +// CK11-NEXT: [[TMP127:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 10 +// CK11-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP127]], align 4 +// CK11-NEXT: [[TMP128:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 11 +// CK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP128]], align 4 +// CK11-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 12 +// CK11-NEXT: store i32 0, ptr [[TMP129]], align 4 +// CK11-NEXT: [[TMP130:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67.region_id, ptr [[KERNEL_ARGS30]]) +// CK11-NEXT: [[TMP131:%.*]] = icmp ne i32 [[TMP130]], 0 +// CK11-NEXT: br i1 [[TMP131]], label [[OMP_OFFLOAD_FAILED31:%.*]], label [[OMP_OFFLOAD_CONT32:%.*]] +// CK11: omp_offload.failed31: +// CK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67(ptr [[TMP111]]) #[[ATTR2]] +// CK11-NEXT: br label [[OMP_OFFLOAD_CONT32]] +// CK11: omp_offload.cont32: +// CK11-NEXT: [[TMP132:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// CK11-NEXT: store ptr [[TMP132]], ptr [[_TMP33]], align 8 +// CK11-NEXT: [[TMP133:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// CK11-NEXT: store ptr [[TMP133]], ptr [[_TMP34]], align 8 +// CK11-NEXT: [[TMP134:%.*]] = load ptr, ptr [[_TMP33]], align 8 +// CK11-NEXT: [[TMP135:%.*]] = load ptr, ptr [[TMP134]], align 8 +// CK11-NEXT: [[TMP136:%.*]] = load ptr, ptr [[_TMP34]], align 8 +// CK11-NEXT: [[TMP137:%.*]] = load ptr, ptr [[TMP136]], align 8 +// CK11-NEXT: [[TMP138:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP135]], ptr [[TMP138]], align 8 +// CK11-NEXT: [[TMP139:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP135]], ptr [[TMP139]], align 8 +// CK11-NEXT: [[TMP140:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS37]], i64 0, i64 0 +// CK11-NEXT: store ptr null, ptr [[TMP140]], align 8 +// CK11-NEXT: [[TMP141:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 1 +// CK11-NEXT: store ptr [[TMP137]], ptr [[TMP141]], align 8 +// CK11-NEXT: [[TMP142:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 1 +// CK11-NEXT: store ptr [[TMP137]], ptr [[TMP142]], align 8 +// CK11-NEXT: [[TMP143:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS37]], i64 0, i64 1 +// CK11-NEXT: store ptr null, ptr [[TMP143]], align 8 +// CK11-NEXT: [[TMP144:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 +// CK11-NEXT: [[TMP145:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 +// CK11-NEXT: [[TMP146:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 0 +// CK11-NEXT: store i32 2, ptr [[TMP146]], align 4 +// CK11-NEXT: [[TMP147:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 1 +// CK11-NEXT: store i32 2, ptr [[TMP147]], align 4 +// CK11-NEXT: [[TMP148:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 2 +// CK11-NEXT: store ptr [[TMP144]], ptr [[TMP148]], align 8 +// CK11-NEXT: [[TMP149:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 3 +// CK11-NEXT: store ptr [[TMP145]], ptr [[TMP149]], align 8 +// CK11-NEXT: [[TMP150:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 4 +// CK11-NEXT: store ptr @.offload_sizes.11, ptr [[TMP150]], align 8 +// CK11-NEXT: [[TMP151:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 5 +// CK11-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP151]], align 8 +// CK11-NEXT: [[TMP152:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 6 +// CK11-NEXT: store ptr null, ptr [[TMP152]], align 8 +// CK11-NEXT: [[TMP153:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 7 +// CK11-NEXT: store ptr null, ptr [[TMP153]], align 8 +// CK11-NEXT: [[TMP154:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 8 +// CK11-NEXT: store i64 0, ptr [[TMP154]], align 8 +// CK11-NEXT: [[TMP155:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 9 +// CK11-NEXT: store i64 0, ptr [[TMP155]], align 8 +// CK11-NEXT: [[TMP156:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 10 +// CK11-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP156]], align 4 +// CK11-NEXT: [[TMP157:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 11 +// CK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP157]], align 4 +// CK11-NEXT: [[TMP158:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 12 +// CK11-NEXT: store i32 0, ptr [[TMP158]], align 4 +// CK11-NEXT: [[TMP159:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74.region_id, ptr [[KERNEL_ARGS38]]) +// CK11-NEXT: [[TMP160:%.*]] = icmp ne i32 [[TMP159]], 0 +// CK11-NEXT: br i1 [[TMP160]], label [[OMP_OFFLOAD_FAILED39:%.*]], label [[OMP_OFFLOAD_CONT40:%.*]] +// CK11: omp_offload.failed39: +// CK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74(ptr [[TMP135]], ptr [[TMP137]]) #[[ATTR2]] +// CK11-NEXT: br label [[OMP_OFFLOAD_CONT40]] +// CK11: omp_offload.cont40: +// CK11-NEXT: ret void +// +// +// CK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37 +// CK11-SAME: (ptr noundef [[G:%.*]]) #[[ATTR1:[0-9]+]] { +// CK11-NEXT: entry: +// CK11-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CK11-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 +// CK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK11-NEXT: store ptr [[INCDEC_PTR]], ptr [[G_ADDR]], align 8 +// CK11-NEXT: ret void +// +// +// CK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43 +// CK11-SAME: (ptr noundef [[L:%.*]]) #[[ATTR1]] { +// CK11-NEXT: entry: +// CK11-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 8 +// CK11-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 8 +// CK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[L_ADDR]], align 8 +// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 1 +// CK11-NEXT: store ptr [[INCDEC_PTR]], ptr [[L_ADDR]], align 8 +// CK11-NEXT: ret void +// +// +// CK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49 +// CK11-SAME: (ptr noundef [[T:%.*]]) #[[ATTR1]] { +// CK11-NEXT: entry: +// CK11-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 8 +// CK11-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8 +// CK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8 +// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 +// CK11-NEXT: store ptr [[INCDEC_PTR]], ptr [[T_ADDR]], align 8 +// CK11-NEXT: ret void +// +// +// CK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55 +// CK11-SAME: (ptr noundef [[LR:%.*]]) #[[ATTR1]] { +// CK11-NEXT: entry: +// CK11-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK11-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 8 +// CK11-NEXT: store ptr [[LR_ADDR]], ptr [[TMP]], align 8 +// CK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// CK11-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 +// CK11-NEXT: ret void +// +// +// CK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61 +// CK11-SAME: (ptr noundef [[TR:%.*]]) #[[ATTR1]] { +// CK11-NEXT: entry: +// CK11-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK11-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// CK11-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 8 +// CK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK11-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 +// CK11-NEXT: ret void +// +// +// CK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67 +// CK11-SAME: (ptr noundef [[TR:%.*]]) #[[ATTR1]] { +// CK11-NEXT: entry: +// CK11-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK11-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// CK11-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 8 +// CK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK11-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 +// CK11-NEXT: ret void +// +// +// CK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74 +// CK11-SAME: (ptr noundef [[TR:%.*]], ptr noundef [[LR:%.*]]) #[[ATTR1]] { +// CK11-NEXT: entry: +// CK11-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 +// CK11-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// CK11-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 8 +// CK11-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 8 +// CK11-NEXT: store ptr [[LR_ADDR]], ptr [[_TMP1]], align 8 +// CK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK11-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 +// CK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CK11-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 1 +// CK11-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP2]], align 8 +// CK11-NEXT: ret void +// +// +// CK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK11-SAME: () #[[ATTR3:[0-9]+]] { +// CK11-NEXT: entry: +// CK11-NEXT: call void @__tgt_register_requires(i64 1) +// CK11-NEXT: ret void +// +// +// CK12-LABEL: define {{[^@]+}}@_Z3barRPfRPi +// CK12-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CK12-NEXT: entry: +// CK12-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CK12-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CK12-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CK12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CK12-NEXT: call void @_Z3fooIiEvRPfRPT_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP0]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +// CK12-NEXT: ret void +// +// +// CK12-LABEL: define {{[^@]+}}@_Z3fooIiEvRPfRPT_ +// CK12-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TR:%.*]]) #[[ATTR0]] comdat { +// CK12-NEXT: entry: +// CK12-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[L:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[T:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK12-NEXT: [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK12-NEXT: [[DOTOFFLOAD_BASEPTRS7:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_PTRS8:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_MAPPERS9:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK12-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[DOTOFFLOAD_BASEPTRS13:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_PTRS14:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_MAPPERS15:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK12-NEXT: [[_TMP19:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[DOTOFFLOAD_BASEPTRS20:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_PTRS21:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_MAPPERS22:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK12-NEXT: [[_TMP26:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK12-NEXT: [[_TMP33:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[_TMP34:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[DOTOFFLOAD_BASEPTRS35:%.*]] = alloca [2 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_PTRS36:%.*]] = alloca [2 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_MAPPERS37:%.*]] = alloca [2 x ptr], align 4 +// CK12-NEXT: [[KERNEL_ARGS38:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK12-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 4 +// CK12-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// CK12-NEXT: [[TMP0:%.*]] = load ptr, ptr @g, align 4 +// CK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CK12-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CK12-NEXT: store ptr null, ptr [[TMP3]], align 4 +// CK12-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK12-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK12-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK12-NEXT: store i32 2, ptr [[TMP6]], align 4 +// CK12-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK12-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CK12-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK12-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CK12-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK12-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CK12-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK12-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 4 +// CK12-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK12-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 4 +// CK12-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK12-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CK12-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK12-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CK12-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK12-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK12-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK12-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CK12-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK12-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP16]], align 4 +// CK12-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK12-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CK12-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK12-NEXT: store i32 0, ptr [[TMP18]], align 4 +// CK12-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37.region_id, ptr [[KERNEL_ARGS]]) +// CK12-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CK12-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK12: omp_offload.failed: +// CK12-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37(ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CK12-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK12: omp_offload.cont: +// CK12-NEXT: [[TMP21:%.*]] = load ptr, ptr [[L]], align 4 +// CK12-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP21]], ptr [[TMP22]], align 4 +// CK12-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP21]], ptr [[TMP23]], align 4 +// CK12-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 0 +// CK12-NEXT: store ptr null, ptr [[TMP24]], align 4 +// CK12-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CK12-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CK12-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CK12-NEXT: store i32 2, ptr [[TMP27]], align 4 +// CK12-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 +// CK12-NEXT: store i32 1, ptr [[TMP28]], align 4 +// CK12-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CK12-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 4 +// CK12-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 +// CK12-NEXT: store ptr [[TMP26]], ptr [[TMP30]], align 4 +// CK12-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 +// CK12-NEXT: store ptr @.offload_sizes.1, ptr [[TMP31]], align 4 +// CK12-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 +// CK12-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP32]], align 4 +// CK12-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CK12-NEXT: store ptr null, ptr [[TMP33]], align 4 +// CK12-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 +// CK12-NEXT: store ptr null, ptr [[TMP34]], align 4 +// CK12-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 +// CK12-NEXT: store i64 0, ptr [[TMP35]], align 8 +// CK12-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 9 +// CK12-NEXT: store i64 0, ptr [[TMP36]], align 8 +// CK12-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 10 +// CK12-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP37]], align 4 +// CK12-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 11 +// CK12-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP38]], align 4 +// CK12-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 12 +// CK12-NEXT: store i32 0, ptr [[TMP39]], align 4 +// CK12-NEXT: [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43.region_id, ptr [[KERNEL_ARGS4]]) +// CK12-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CK12-NEXT: br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] +// CK12: omp_offload.failed5: +// CK12-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43(ptr [[TMP21]]) #[[ATTR2]] +// CK12-NEXT: br label [[OMP_OFFLOAD_CONT6]] +// CK12: omp_offload.cont6: +// CK12-NEXT: [[TMP42:%.*]] = load ptr, ptr [[T]], align 4 +// CK12-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP42]], ptr [[TMP43]], align 4 +// CK12-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP42]], ptr [[TMP44]], align 4 +// CK12-NEXT: [[TMP45:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 0 +// CK12-NEXT: store ptr null, ptr [[TMP45]], align 4 +// CK12-NEXT: [[TMP46:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CK12-NEXT: [[TMP47:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CK12-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 +// CK12-NEXT: store i32 2, ptr [[TMP48]], align 4 +// CK12-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 +// CK12-NEXT: store i32 1, ptr [[TMP49]], align 4 +// CK12-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 +// CK12-NEXT: store ptr [[TMP46]], ptr [[TMP50]], align 4 +// CK12-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 +// CK12-NEXT: store ptr [[TMP47]], ptr [[TMP51]], align 4 +// CK12-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 +// CK12-NEXT: store ptr @.offload_sizes.3, ptr [[TMP52]], align 4 +// CK12-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 +// CK12-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP53]], align 4 +// CK12-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 +// CK12-NEXT: store ptr null, ptr [[TMP54]], align 4 +// CK12-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 +// CK12-NEXT: store ptr null, ptr [[TMP55]], align 4 +// CK12-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 +// CK12-NEXT: store i64 0, ptr [[TMP56]], align 8 +// CK12-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 9 +// CK12-NEXT: store i64 0, ptr [[TMP57]], align 8 +// CK12-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 10 +// CK12-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP58]], align 4 +// CK12-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 11 +// CK12-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP59]], align 4 +// CK12-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 12 +// CK12-NEXT: store i32 0, ptr [[TMP60]], align 4 +// CK12-NEXT: [[TMP61:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49.region_id, ptr [[KERNEL_ARGS10]]) +// CK12-NEXT: [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0 +// CK12-NEXT: br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] +// CK12: omp_offload.failed11: +// CK12-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49(ptr [[TMP42]]) #[[ATTR2]] +// CK12-NEXT: br label [[OMP_OFFLOAD_CONT12]] +// CK12: omp_offload.cont12: +// CK12-NEXT: [[TMP63:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// CK12-NEXT: store ptr [[TMP63]], ptr [[TMP]], align 4 +// CK12-NEXT: [[TMP64:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK12-NEXT: [[TMP65:%.*]] = load ptr, ptr [[TMP64]], align 4 +// CK12-NEXT: [[TMP66:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP65]], ptr [[TMP66]], align 4 +// CK12-NEXT: [[TMP67:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP65]], ptr [[TMP67]], align 4 +// CK12-NEXT: [[TMP68:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 0 +// CK12-NEXT: store ptr null, ptr [[TMP68]], align 4 +// CK12-NEXT: [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CK12-NEXT: [[TMP70:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CK12-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 +// CK12-NEXT: store i32 2, ptr [[TMP71]], align 4 +// CK12-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 +// CK12-NEXT: store i32 1, ptr [[TMP72]], align 4 +// CK12-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 +// CK12-NEXT: store ptr [[TMP69]], ptr [[TMP73]], align 4 +// CK12-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 3 +// CK12-NEXT: store ptr [[TMP70]], ptr [[TMP74]], align 4 +// CK12-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 4 +// CK12-NEXT: store ptr @.offload_sizes.5, ptr [[TMP75]], align 4 +// CK12-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 5 +// CK12-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP76]], align 4 +// CK12-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 6 +// CK12-NEXT: store ptr null, ptr [[TMP77]], align 4 +// CK12-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 7 +// CK12-NEXT: store ptr null, ptr [[TMP78]], align 4 +// CK12-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 8 +// CK12-NEXT: store i64 0, ptr [[TMP79]], align 8 +// CK12-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 9 +// CK12-NEXT: store i64 0, ptr [[TMP80]], align 8 +// CK12-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 10 +// CK12-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP81]], align 4 +// CK12-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 11 +// CK12-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP82]], align 4 +// CK12-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 12 +// CK12-NEXT: store i32 0, ptr [[TMP83]], align 4 +// CK12-NEXT: [[TMP84:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55.region_id, ptr [[KERNEL_ARGS16]]) +// CK12-NEXT: [[TMP85:%.*]] = icmp ne i32 [[TMP84]], 0 +// CK12-NEXT: br i1 [[TMP85]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]] +// CK12: omp_offload.failed17: +// CK12-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55(ptr [[TMP65]]) #[[ATTR2]] +// CK12-NEXT: br label [[OMP_OFFLOAD_CONT18]] +// CK12: omp_offload.cont18: +// CK12-NEXT: [[TMP86:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// CK12-NEXT: store ptr [[TMP86]], ptr [[_TMP19]], align 4 +// CK12-NEXT: [[TMP87:%.*]] = load ptr, ptr [[_TMP19]], align 4 +// CK12-NEXT: [[TMP88:%.*]] = load ptr, ptr [[TMP87]], align 4 +// CK12-NEXT: [[TMP89:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP88]], ptr [[TMP89]], align 4 +// CK12-NEXT: [[TMP90:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP88]], ptr [[TMP90]], align 4 +// CK12-NEXT: [[TMP91:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 0 +// CK12-NEXT: store ptr null, ptr [[TMP91]], align 4 +// CK12-NEXT: [[TMP92:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CK12-NEXT: [[TMP93:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CK12-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 +// CK12-NEXT: store i32 2, ptr [[TMP94]], align 4 +// CK12-NEXT: [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 +// CK12-NEXT: store i32 1, ptr [[TMP95]], align 4 +// CK12-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 +// CK12-NEXT: store ptr [[TMP92]], ptr [[TMP96]], align 4 +// CK12-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 3 +// CK12-NEXT: store ptr [[TMP93]], ptr [[TMP97]], align 4 +// CK12-NEXT: [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 4 +// CK12-NEXT: store ptr @.offload_sizes.7, ptr [[TMP98]], align 4 +// CK12-NEXT: [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 5 +// CK12-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP99]], align 4 +// CK12-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 6 +// CK12-NEXT: store ptr null, ptr [[TMP100]], align 4 +// CK12-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 7 +// CK12-NEXT: store ptr null, ptr [[TMP101]], align 4 +// CK12-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 8 +// CK12-NEXT: store i64 0, ptr [[TMP102]], align 8 +// CK12-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 9 +// CK12-NEXT: store i64 0, ptr [[TMP103]], align 8 +// CK12-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 10 +// CK12-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP104]], align 4 +// CK12-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 11 +// CK12-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP105]], align 4 +// CK12-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 12 +// CK12-NEXT: store i32 0, ptr [[TMP106]], align 4 +// CK12-NEXT: [[TMP107:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61.region_id, ptr [[KERNEL_ARGS23]]) +// CK12-NEXT: [[TMP108:%.*]] = icmp ne i32 [[TMP107]], 0 +// CK12-NEXT: br i1 [[TMP108]], label [[OMP_OFFLOAD_FAILED24:%.*]], label [[OMP_OFFLOAD_CONT25:%.*]] +// CK12: omp_offload.failed24: +// CK12-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61(ptr [[TMP88]]) #[[ATTR2]] +// CK12-NEXT: br label [[OMP_OFFLOAD_CONT25]] +// CK12: omp_offload.cont25: +// CK12-NEXT: [[TMP109:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// CK12-NEXT: store ptr [[TMP109]], ptr [[_TMP26]], align 4 +// CK12-NEXT: [[TMP110:%.*]] = load ptr, ptr [[_TMP26]], align 4 +// CK12-NEXT: [[TMP111:%.*]] = load ptr, ptr [[TMP110]], align 4 +// CK12-NEXT: [[TMP112:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP111]], ptr [[TMP112]], align 4 +// CK12-NEXT: [[TMP113:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP111]], ptr [[TMP113]], align 4 +// CK12-NEXT: [[TMP114:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i32 0, i32 0 +// CK12-NEXT: store ptr null, ptr [[TMP114]], align 4 +// CK12-NEXT: [[TMP115:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 +// CK12-NEXT: [[TMP116:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 +// CK12-NEXT: [[TMP117:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 +// CK12-NEXT: store i32 2, ptr [[TMP117]], align 4 +// CK12-NEXT: [[TMP118:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 +// CK12-NEXT: store i32 1, ptr [[TMP118]], align 4 +// CK12-NEXT: [[TMP119:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 +// CK12-NEXT: store ptr [[TMP115]], ptr [[TMP119]], align 4 +// CK12-NEXT: [[TMP120:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 3 +// CK12-NEXT: store ptr [[TMP116]], ptr [[TMP120]], align 4 +// CK12-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 4 +// CK12-NEXT: store ptr @.offload_sizes.9, ptr [[TMP121]], align 4 +// CK12-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 5 +// CK12-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP122]], align 4 +// CK12-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 6 +// CK12-NEXT: store ptr null, ptr [[TMP123]], align 4 +// CK12-NEXT: [[TMP124:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 7 +// CK12-NEXT: store ptr null, ptr [[TMP124]], align 4 +// CK12-NEXT: [[TMP125:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 8 +// CK12-NEXT: store i64 0, ptr [[TMP125]], align 8 +// CK12-NEXT: [[TMP126:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 9 +// CK12-NEXT: store i64 0, ptr [[TMP126]], align 8 +// CK12-NEXT: [[TMP127:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 10 +// CK12-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP127]], align 4 +// CK12-NEXT: [[TMP128:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 11 +// CK12-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP128]], align 4 +// CK12-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 12 +// CK12-NEXT: store i32 0, ptr [[TMP129]], align 4 +// CK12-NEXT: [[TMP130:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67.region_id, ptr [[KERNEL_ARGS30]]) +// CK12-NEXT: [[TMP131:%.*]] = icmp ne i32 [[TMP130]], 0 +// CK12-NEXT: br i1 [[TMP131]], label [[OMP_OFFLOAD_FAILED31:%.*]], label [[OMP_OFFLOAD_CONT32:%.*]] +// CK12: omp_offload.failed31: +// CK12-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67(ptr [[TMP111]]) #[[ATTR2]] +// CK12-NEXT: br label [[OMP_OFFLOAD_CONT32]] +// CK12: omp_offload.cont32: +// CK12-NEXT: [[TMP132:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// CK12-NEXT: store ptr [[TMP132]], ptr [[_TMP33]], align 4 +// CK12-NEXT: [[TMP133:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// CK12-NEXT: store ptr [[TMP133]], ptr [[_TMP34]], align 4 +// CK12-NEXT: [[TMP134:%.*]] = load ptr, ptr [[_TMP33]], align 4 +// CK12-NEXT: [[TMP135:%.*]] = load ptr, ptr [[TMP134]], align 4 +// CK12-NEXT: [[TMP136:%.*]] = load ptr, ptr [[_TMP34]], align 4 +// CK12-NEXT: [[TMP137:%.*]] = load ptr, ptr [[TMP136]], align 4 +// CK12-NEXT: [[TMP138:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP135]], ptr [[TMP138]], align 4 +// CK12-NEXT: [[TMP139:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP135]], ptr [[TMP139]], align 4 +// CK12-NEXT: [[TMP140:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS37]], i32 0, i32 0 +// CK12-NEXT: store ptr null, ptr [[TMP140]], align 4 +// CK12-NEXT: [[TMP141:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 1 +// CK12-NEXT: store ptr [[TMP137]], ptr [[TMP141]], align 4 +// CK12-NEXT: [[TMP142:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 1 +// CK12-NEXT: store ptr [[TMP137]], ptr [[TMP142]], align 4 +// CK12-NEXT: [[TMP143:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS37]], i32 0, i32 1 +// CK12-NEXT: store ptr null, ptr [[TMP143]], align 4 +// CK12-NEXT: [[TMP144:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 +// CK12-NEXT: [[TMP145:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 +// CK12-NEXT: [[TMP146:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 0 +// CK12-NEXT: store i32 2, ptr [[TMP146]], align 4 +// CK12-NEXT: [[TMP147:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 1 +// CK12-NEXT: store i32 2, ptr [[TMP147]], align 4 +// CK12-NEXT: [[TMP148:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 2 +// CK12-NEXT: store ptr [[TMP144]], ptr [[TMP148]], align 4 +// CK12-NEXT: [[TMP149:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 3 +// CK12-NEXT: store ptr [[TMP145]], ptr [[TMP149]], align 4 +// CK12-NEXT: [[TMP150:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 4 +// CK12-NEXT: store ptr @.offload_sizes.11, ptr [[TMP150]], align 4 +// CK12-NEXT: [[TMP151:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 5 +// CK12-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP151]], align 4 +// CK12-NEXT: [[TMP152:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 6 +// CK12-NEXT: store ptr null, ptr [[TMP152]], align 4 +// CK12-NEXT: [[TMP153:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 7 +// CK12-NEXT: store ptr null, ptr [[TMP153]], align 4 +// CK12-NEXT: [[TMP154:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 8 +// CK12-NEXT: store i64 0, ptr [[TMP154]], align 8 +// CK12-NEXT: [[TMP155:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 9 +// CK12-NEXT: store i64 0, ptr [[TMP155]], align 8 +// CK12-NEXT: [[TMP156:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 10 +// CK12-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP156]], align 4 +// CK12-NEXT: [[TMP157:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 11 +// CK12-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP157]], align 4 +// CK12-NEXT: [[TMP158:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 12 +// CK12-NEXT: store i32 0, ptr [[TMP158]], align 4 +// CK12-NEXT: [[TMP159:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74.region_id, ptr [[KERNEL_ARGS38]]) +// CK12-NEXT: [[TMP160:%.*]] = icmp ne i32 [[TMP159]], 0 +// CK12-NEXT: br i1 [[TMP160]], label [[OMP_OFFLOAD_FAILED39:%.*]], label [[OMP_OFFLOAD_CONT40:%.*]] +// CK12: omp_offload.failed39: +// CK12-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74(ptr [[TMP135]], ptr [[TMP137]]) #[[ATTR2]] +// CK12-NEXT: br label [[OMP_OFFLOAD_CONT40]] +// CK12: omp_offload.cont40: +// CK12-NEXT: ret void +// +// +// CK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37 +// CK12-SAME: (ptr noundef [[G:%.*]]) #[[ATTR1:[0-9]+]] { +// CK12-NEXT: entry: +// CK12-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CK12-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 +// CK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 +// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK12-NEXT: store ptr [[INCDEC_PTR]], ptr [[G_ADDR]], align 4 +// CK12-NEXT: ret void +// +// +// CK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43 +// CK12-SAME: (ptr noundef [[L:%.*]]) #[[ATTR1]] { +// CK12-NEXT: entry: +// CK12-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 4 +// CK12-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 4 +// CK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[L_ADDR]], align 4 +// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 1 +// CK12-NEXT: store ptr [[INCDEC_PTR]], ptr [[L_ADDR]], align 4 +// CK12-NEXT: ret void +// +// +// CK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49 +// CK12-SAME: (ptr noundef [[T:%.*]]) #[[ATTR1]] { +// CK12-NEXT: entry: +// CK12-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 4 +// CK12-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 4 +// CK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 4 +// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 +// CK12-NEXT: store ptr [[INCDEC_PTR]], ptr [[T_ADDR]], align 4 +// CK12-NEXT: ret void +// +// +// CK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55 +// CK12-SAME: (ptr noundef [[LR:%.*]]) #[[ATTR1]] { +// CK12-NEXT: entry: +// CK12-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK12-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 4 +// CK12-NEXT: store ptr [[LR_ADDR]], ptr [[TMP]], align 4 +// CK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 +// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// CK12-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 +// CK12-NEXT: ret void +// +// +// CK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61 +// CK12-SAME: (ptr noundef [[TR:%.*]]) #[[ATTR1]] { +// CK12-NEXT: entry: +// CK12-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK12-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// CK12-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 4 +// CK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 +// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK12-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 +// CK12-NEXT: ret void +// +// +// CK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67 +// CK12-SAME: (ptr noundef [[TR:%.*]]) #[[ATTR1]] { +// CK12-NEXT: entry: +// CK12-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK12-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// CK12-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 4 +// CK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 +// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK12-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 +// CK12-NEXT: ret void +// +// +// CK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74 +// CK12-SAME: (ptr noundef [[TR:%.*]], ptr noundef [[LR:%.*]]) #[[ATTR1]] { +// CK12-NEXT: entry: +// CK12-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 +// CK12-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// CK12-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 4 +// CK12-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 4 +// CK12-NEXT: store ptr [[LR_ADDR]], ptr [[_TMP1]], align 4 +// CK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 +// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK12-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 +// CK12-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CK12-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CK12-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 1 +// CK12-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP2]], align 4 +// CK12-NEXT: ret void +// +// +// CK12-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK12-SAME: () #[[ATTR3:[0-9]+]] { +// CK12-NEXT: entry: +// CK12-NEXT: call void @__tgt_register_requires(i64 1) +// CK12-NEXT: ret void +// +// +// CK13-LABEL: define {{[^@]+}}@_Z3barRPfRPi +// CK13-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CK13-NEXT: entry: +// CK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CK13-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CK13-NEXT: call void @_Z3fooIiEvRPfRPT_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP0]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +// CK13-NEXT: ret void +// +// +// CK13-LABEL: define {{[^@]+}}@_Z3fooIiEvRPfRPT_ +// CK13-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TR:%.*]]) #[[ATTR0]] comdat { +// CK13-NEXT: entry: +// CK13-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[L:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[T:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK13-NEXT: [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK13-NEXT: [[DOTOFFLOAD_BASEPTRS7:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_PTRS8:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_MAPPERS9:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK13-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[DOTOFFLOAD_BASEPTRS13:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_PTRS14:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_MAPPERS15:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK13-NEXT: [[_TMP19:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[DOTOFFLOAD_BASEPTRS20:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_PTRS21:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_MAPPERS22:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK13-NEXT: [[_TMP26:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK13-NEXT: [[_TMP33:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[_TMP34:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[DOTOFFLOAD_BASEPTRS35:%.*]] = alloca [2 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_PTRS36:%.*]] = alloca [2 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_MAPPERS37:%.*]] = alloca [2 x ptr], align 4 +// CK13-NEXT: [[KERNEL_ARGS38:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK13-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 4 +// CK13-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// CK13-NEXT: [[TMP0:%.*]] = load ptr, ptr @g, align 4 +// CK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CK13-NEXT: store ptr null, ptr [[TMP3]], align 4 +// CK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK13-NEXT: store i32 2, ptr [[TMP6]], align 4 +// CK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK13-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CK13-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK13-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK13-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CK13-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK13-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 4 +// CK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK13-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 4 +// CK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK13-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK13-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK13-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK13-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK13-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP16]], align 4 +// CK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK13-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK13-NEXT: store i32 0, ptr [[TMP18]], align 4 +// CK13-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37.region_id, ptr [[KERNEL_ARGS]]) +// CK13-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CK13-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK13: omp_offload.failed: +// CK13-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37(ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CK13-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK13: omp_offload.cont: +// CK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[L]], align 4 +// CK13-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP21]], ptr [[TMP22]], align 4 +// CK13-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP21]], ptr [[TMP23]], align 4 +// CK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 0 +// CK13-NEXT: store ptr null, ptr [[TMP24]], align 4 +// CK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CK13-NEXT: store i32 2, ptr [[TMP27]], align 4 +// CK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 +// CK13-NEXT: store i32 1, ptr [[TMP28]], align 4 +// CK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CK13-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 4 +// CK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 +// CK13-NEXT: store ptr [[TMP26]], ptr [[TMP30]], align 4 +// CK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 +// CK13-NEXT: store ptr @.offload_sizes.1, ptr [[TMP31]], align 4 +// CK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 +// CK13-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP32]], align 4 +// CK13-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CK13-NEXT: store ptr null, ptr [[TMP33]], align 4 +// CK13-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 +// CK13-NEXT: store ptr null, ptr [[TMP34]], align 4 +// CK13-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 +// CK13-NEXT: store i64 0, ptr [[TMP35]], align 8 +// CK13-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 9 +// CK13-NEXT: store i64 0, ptr [[TMP36]], align 8 +// CK13-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 10 +// CK13-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP37]], align 4 +// CK13-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 11 +// CK13-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP38]], align 4 +// CK13-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 12 +// CK13-NEXT: store i32 0, ptr [[TMP39]], align 4 +// CK13-NEXT: [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43.region_id, ptr [[KERNEL_ARGS4]]) +// CK13-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CK13-NEXT: br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] +// CK13: omp_offload.failed5: +// CK13-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43(ptr [[TMP21]]) #[[ATTR2]] +// CK13-NEXT: br label [[OMP_OFFLOAD_CONT6]] +// CK13: omp_offload.cont6: +// CK13-NEXT: [[TMP42:%.*]] = load ptr, ptr [[T]], align 4 +// CK13-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP42]], ptr [[TMP43]], align 4 +// CK13-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP42]], ptr [[TMP44]], align 4 +// CK13-NEXT: [[TMP45:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 0 +// CK13-NEXT: store ptr null, ptr [[TMP45]], align 4 +// CK13-NEXT: [[TMP46:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CK13-NEXT: [[TMP47:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CK13-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 +// CK13-NEXT: store i32 2, ptr [[TMP48]], align 4 +// CK13-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 +// CK13-NEXT: store i32 1, ptr [[TMP49]], align 4 +// CK13-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 +// CK13-NEXT: store ptr [[TMP46]], ptr [[TMP50]], align 4 +// CK13-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 +// CK13-NEXT: store ptr [[TMP47]], ptr [[TMP51]], align 4 +// CK13-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 +// CK13-NEXT: store ptr @.offload_sizes.3, ptr [[TMP52]], align 4 +// CK13-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 +// CK13-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP53]], align 4 +// CK13-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 +// CK13-NEXT: store ptr null, ptr [[TMP54]], align 4 +// CK13-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 +// CK13-NEXT: store ptr null, ptr [[TMP55]], align 4 +// CK13-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 +// CK13-NEXT: store i64 0, ptr [[TMP56]], align 8 +// CK13-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 9 +// CK13-NEXT: store i64 0, ptr [[TMP57]], align 8 +// CK13-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 10 +// CK13-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP58]], align 4 +// CK13-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 11 +// CK13-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP59]], align 4 +// CK13-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 12 +// CK13-NEXT: store i32 0, ptr [[TMP60]], align 4 +// CK13-NEXT: [[TMP61:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49.region_id, ptr [[KERNEL_ARGS10]]) +// CK13-NEXT: [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0 +// CK13-NEXT: br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] +// CK13: omp_offload.failed11: +// CK13-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49(ptr [[TMP42]]) #[[ATTR2]] +// CK13-NEXT: br label [[OMP_OFFLOAD_CONT12]] +// CK13: omp_offload.cont12: +// CK13-NEXT: [[TMP63:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// CK13-NEXT: store ptr [[TMP63]], ptr [[TMP]], align 4 +// CK13-NEXT: [[TMP64:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK13-NEXT: [[TMP65:%.*]] = load ptr, ptr [[TMP64]], align 4 +// CK13-NEXT: [[TMP66:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP65]], ptr [[TMP66]], align 4 +// CK13-NEXT: [[TMP67:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP65]], ptr [[TMP67]], align 4 +// CK13-NEXT: [[TMP68:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 0 +// CK13-NEXT: store ptr null, ptr [[TMP68]], align 4 +// CK13-NEXT: [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CK13-NEXT: [[TMP70:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CK13-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 +// CK13-NEXT: store i32 2, ptr [[TMP71]], align 4 +// CK13-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 +// CK13-NEXT: store i32 1, ptr [[TMP72]], align 4 +// CK13-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 +// CK13-NEXT: store ptr [[TMP69]], ptr [[TMP73]], align 4 +// CK13-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 3 +// CK13-NEXT: store ptr [[TMP70]], ptr [[TMP74]], align 4 +// CK13-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 4 +// CK13-NEXT: store ptr @.offload_sizes.5, ptr [[TMP75]], align 4 +// CK13-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 5 +// CK13-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP76]], align 4 +// CK13-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 6 +// CK13-NEXT: store ptr null, ptr [[TMP77]], align 4 +// CK13-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 7 +// CK13-NEXT: store ptr null, ptr [[TMP78]], align 4 +// CK13-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 8 +// CK13-NEXT: store i64 0, ptr [[TMP79]], align 8 +// CK13-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 9 +// CK13-NEXT: store i64 0, ptr [[TMP80]], align 8 +// CK13-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 10 +// CK13-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP81]], align 4 +// CK13-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 11 +// CK13-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP82]], align 4 +// CK13-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 12 +// CK13-NEXT: store i32 0, ptr [[TMP83]], align 4 +// CK13-NEXT: [[TMP84:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55.region_id, ptr [[KERNEL_ARGS16]]) +// CK13-NEXT: [[TMP85:%.*]] = icmp ne i32 [[TMP84]], 0 +// CK13-NEXT: br i1 [[TMP85]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]] +// CK13: omp_offload.failed17: +// CK13-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55(ptr [[TMP65]]) #[[ATTR2]] +// CK13-NEXT: br label [[OMP_OFFLOAD_CONT18]] +// CK13: omp_offload.cont18: +// CK13-NEXT: [[TMP86:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// CK13-NEXT: store ptr [[TMP86]], ptr [[_TMP19]], align 4 +// CK13-NEXT: [[TMP87:%.*]] = load ptr, ptr [[_TMP19]], align 4 +// CK13-NEXT: [[TMP88:%.*]] = load ptr, ptr [[TMP87]], align 4 +// CK13-NEXT: [[TMP89:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP88]], ptr [[TMP89]], align 4 +// CK13-NEXT: [[TMP90:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP88]], ptr [[TMP90]], align 4 +// CK13-NEXT: [[TMP91:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 0 +// CK13-NEXT: store ptr null, ptr [[TMP91]], align 4 +// CK13-NEXT: [[TMP92:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CK13-NEXT: [[TMP93:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CK13-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 +// CK13-NEXT: store i32 2, ptr [[TMP94]], align 4 +// CK13-NEXT: [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 +// CK13-NEXT: store i32 1, ptr [[TMP95]], align 4 +// CK13-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 +// CK13-NEXT: store ptr [[TMP92]], ptr [[TMP96]], align 4 +// CK13-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 3 +// CK13-NEXT: store ptr [[TMP93]], ptr [[TMP97]], align 4 +// CK13-NEXT: [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 4 +// CK13-NEXT: store ptr @.offload_sizes.7, ptr [[TMP98]], align 4 +// CK13-NEXT: [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 5 +// CK13-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP99]], align 4 +// CK13-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 6 +// CK13-NEXT: store ptr null, ptr [[TMP100]], align 4 +// CK13-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 7 +// CK13-NEXT: store ptr null, ptr [[TMP101]], align 4 +// CK13-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 8 +// CK13-NEXT: store i64 0, ptr [[TMP102]], align 8 +// CK13-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 9 +// CK13-NEXT: store i64 0, ptr [[TMP103]], align 8 +// CK13-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 10 +// CK13-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP104]], align 4 +// CK13-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 11 +// CK13-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP105]], align 4 +// CK13-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 12 +// CK13-NEXT: store i32 0, ptr [[TMP106]], align 4 +// CK13-NEXT: [[TMP107:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61.region_id, ptr [[KERNEL_ARGS23]]) +// CK13-NEXT: [[TMP108:%.*]] = icmp ne i32 [[TMP107]], 0 +// CK13-NEXT: br i1 [[TMP108]], label [[OMP_OFFLOAD_FAILED24:%.*]], label [[OMP_OFFLOAD_CONT25:%.*]] +// CK13: omp_offload.failed24: +// CK13-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61(ptr [[TMP88]]) #[[ATTR2]] +// CK13-NEXT: br label [[OMP_OFFLOAD_CONT25]] +// CK13: omp_offload.cont25: +// CK13-NEXT: [[TMP109:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// CK13-NEXT: store ptr [[TMP109]], ptr [[_TMP26]], align 4 +// CK13-NEXT: [[TMP110:%.*]] = load ptr, ptr [[_TMP26]], align 4 +// CK13-NEXT: [[TMP111:%.*]] = load ptr, ptr [[TMP110]], align 4 +// CK13-NEXT: [[TMP112:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP111]], ptr [[TMP112]], align 4 +// CK13-NEXT: [[TMP113:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP111]], ptr [[TMP113]], align 4 +// CK13-NEXT: [[TMP114:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i32 0, i32 0 +// CK13-NEXT: store ptr null, ptr [[TMP114]], align 4 +// CK13-NEXT: [[TMP115:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 +// CK13-NEXT: [[TMP116:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 +// CK13-NEXT: [[TMP117:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 +// CK13-NEXT: store i32 2, ptr [[TMP117]], align 4 +// CK13-NEXT: [[TMP118:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 +// CK13-NEXT: store i32 1, ptr [[TMP118]], align 4 +// CK13-NEXT: [[TMP119:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 +// CK13-NEXT: store ptr [[TMP115]], ptr [[TMP119]], align 4 +// CK13-NEXT: [[TMP120:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 3 +// CK13-NEXT: store ptr [[TMP116]], ptr [[TMP120]], align 4 +// CK13-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 4 +// CK13-NEXT: store ptr @.offload_sizes.9, ptr [[TMP121]], align 4 +// CK13-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 5 +// CK13-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP122]], align 4 +// CK13-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 6 +// CK13-NEXT: store ptr null, ptr [[TMP123]], align 4 +// CK13-NEXT: [[TMP124:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 7 +// CK13-NEXT: store ptr null, ptr [[TMP124]], align 4 +// CK13-NEXT: [[TMP125:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 8 +// CK13-NEXT: store i64 0, ptr [[TMP125]], align 8 +// CK13-NEXT: [[TMP126:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 9 +// CK13-NEXT: store i64 0, ptr [[TMP126]], align 8 +// CK13-NEXT: [[TMP127:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 10 +// CK13-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP127]], align 4 +// CK13-NEXT: [[TMP128:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 11 +// CK13-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP128]], align 4 +// CK13-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 12 +// CK13-NEXT: store i32 0, ptr [[TMP129]], align 4 +// CK13-NEXT: [[TMP130:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67.region_id, ptr [[KERNEL_ARGS30]]) +// CK13-NEXT: [[TMP131:%.*]] = icmp ne i32 [[TMP130]], 0 +// CK13-NEXT: br i1 [[TMP131]], label [[OMP_OFFLOAD_FAILED31:%.*]], label [[OMP_OFFLOAD_CONT32:%.*]] +// CK13: omp_offload.failed31: +// CK13-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67(ptr [[TMP111]]) #[[ATTR2]] +// CK13-NEXT: br label [[OMP_OFFLOAD_CONT32]] +// CK13: omp_offload.cont32: +// CK13-NEXT: [[TMP132:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// CK13-NEXT: store ptr [[TMP132]], ptr [[_TMP33]], align 4 +// CK13-NEXT: [[TMP133:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// CK13-NEXT: store ptr [[TMP133]], ptr [[_TMP34]], align 4 +// CK13-NEXT: [[TMP134:%.*]] = load ptr, ptr [[_TMP33]], align 4 +// CK13-NEXT: [[TMP135:%.*]] = load ptr, ptr [[TMP134]], align 4 +// CK13-NEXT: [[TMP136:%.*]] = load ptr, ptr [[_TMP34]], align 4 +// CK13-NEXT: [[TMP137:%.*]] = load ptr, ptr [[TMP136]], align 4 +// CK13-NEXT: [[TMP138:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP135]], ptr [[TMP138]], align 4 +// CK13-NEXT: [[TMP139:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP135]], ptr [[TMP139]], align 4 +// CK13-NEXT: [[TMP140:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS37]], i32 0, i32 0 +// CK13-NEXT: store ptr null, ptr [[TMP140]], align 4 +// CK13-NEXT: [[TMP141:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 1 +// CK13-NEXT: store ptr [[TMP137]], ptr [[TMP141]], align 4 +// CK13-NEXT: [[TMP142:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 1 +// CK13-NEXT: store ptr [[TMP137]], ptr [[TMP142]], align 4 +// CK13-NEXT: [[TMP143:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS37]], i32 0, i32 1 +// CK13-NEXT: store ptr null, ptr [[TMP143]], align 4 +// CK13-NEXT: [[TMP144:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 +// CK13-NEXT: [[TMP145:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 +// CK13-NEXT: [[TMP146:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 0 +// CK13-NEXT: store i32 2, ptr [[TMP146]], align 4 +// CK13-NEXT: [[TMP147:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 1 +// CK13-NEXT: store i32 2, ptr [[TMP147]], align 4 +// CK13-NEXT: [[TMP148:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 2 +// CK13-NEXT: store ptr [[TMP144]], ptr [[TMP148]], align 4 +// CK13-NEXT: [[TMP149:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 3 +// CK13-NEXT: store ptr [[TMP145]], ptr [[TMP149]], align 4 +// CK13-NEXT: [[TMP150:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 4 +// CK13-NEXT: store ptr @.offload_sizes.11, ptr [[TMP150]], align 4 +// CK13-NEXT: [[TMP151:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 5 +// CK13-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP151]], align 4 +// CK13-NEXT: [[TMP152:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 6 +// CK13-NEXT: store ptr null, ptr [[TMP152]], align 4 +// CK13-NEXT: [[TMP153:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 7 +// CK13-NEXT: store ptr null, ptr [[TMP153]], align 4 +// CK13-NEXT: [[TMP154:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 8 +// CK13-NEXT: store i64 0, ptr [[TMP154]], align 8 +// CK13-NEXT: [[TMP155:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 9 +// CK13-NEXT: store i64 0, ptr [[TMP155]], align 8 +// CK13-NEXT: [[TMP156:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 10 +// CK13-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP156]], align 4 +// CK13-NEXT: [[TMP157:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 11 +// CK13-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP157]], align 4 +// CK13-NEXT: [[TMP158:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 12 +// CK13-NEXT: store i32 0, ptr [[TMP158]], align 4 +// CK13-NEXT: [[TMP159:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74.region_id, ptr [[KERNEL_ARGS38]]) +// CK13-NEXT: [[TMP160:%.*]] = icmp ne i32 [[TMP159]], 0 +// CK13-NEXT: br i1 [[TMP160]], label [[OMP_OFFLOAD_FAILED39:%.*]], label [[OMP_OFFLOAD_CONT40:%.*]] +// CK13: omp_offload.failed39: +// CK13-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74(ptr [[TMP135]], ptr [[TMP137]]) #[[ATTR2]] +// CK13-NEXT: br label [[OMP_OFFLOAD_CONT40]] +// CK13: omp_offload.cont40: +// CK13-NEXT: ret void +// +// +// CK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37 +// CK13-SAME: (ptr noundef [[G:%.*]]) #[[ATTR1:[0-9]+]] { +// CK13-NEXT: entry: +// CK13-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CK13-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 +// CK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 +// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK13-NEXT: store ptr [[INCDEC_PTR]], ptr [[G_ADDR]], align 4 +// CK13-NEXT: ret void +// +// +// CK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43 +// CK13-SAME: (ptr noundef [[L:%.*]]) #[[ATTR1]] { +// CK13-NEXT: entry: +// CK13-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 4 +// CK13-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 4 +// CK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[L_ADDR]], align 4 +// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 1 +// CK13-NEXT: store ptr [[INCDEC_PTR]], ptr [[L_ADDR]], align 4 +// CK13-NEXT: ret void +// +// +// CK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49 +// CK13-SAME: (ptr noundef [[T:%.*]]) #[[ATTR1]] { +// CK13-NEXT: entry: +// CK13-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 4 +// CK13-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 4 +// CK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 4 +// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 +// CK13-NEXT: store ptr [[INCDEC_PTR]], ptr [[T_ADDR]], align 4 +// CK13-NEXT: ret void +// +// +// CK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55 +// CK13-SAME: (ptr noundef [[LR:%.*]]) #[[ATTR1]] { +// CK13-NEXT: entry: +// CK13-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK13-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 4 +// CK13-NEXT: store ptr [[LR_ADDR]], ptr [[TMP]], align 4 +// CK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 +// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// CK13-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 +// CK13-NEXT: ret void +// +// +// CK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61 +// CK13-SAME: (ptr noundef [[TR:%.*]]) #[[ATTR1]] { +// CK13-NEXT: entry: +// CK13-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK13-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// CK13-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 4 +// CK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 +// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK13-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 +// CK13-NEXT: ret void +// +// +// CK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67 +// CK13-SAME: (ptr noundef [[TR:%.*]]) #[[ATTR1]] { +// CK13-NEXT: entry: +// CK13-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK13-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// CK13-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 4 +// CK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 +// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK13-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 +// CK13-NEXT: ret void +// +// +// CK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74 +// CK13-SAME: (ptr noundef [[TR:%.*]], ptr noundef [[LR:%.*]]) #[[ATTR1]] { +// CK13-NEXT: entry: +// CK13-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 +// CK13-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// CK13-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 4 +// CK13-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 4 +// CK13-NEXT: store ptr [[LR_ADDR]], ptr [[_TMP1]], align 4 +// CK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 +// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK13-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 +// CK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CK13-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 1 +// CK13-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP2]], align 4 +// CK13-NEXT: ret void +// +// +// CK13-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK13-SAME: () #[[ATTR3:[0-9]+]] { +// CK13-NEXT: entry: +// CK13-NEXT: call void @__tgt_register_requires(i64 1) +// CK13-NEXT: ret void +// +// +// SIMD-ONLY00-LABEL: define {{[^@]+}}@_Z3barRPfRPi +// SIMD-ONLY00-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY00-NEXT: entry: +// SIMD-ONLY00-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY00-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY00-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// SIMD-ONLY00-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// SIMD-ONLY00-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// SIMD-ONLY00-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// SIMD-ONLY00-NEXT: call void @_Z3fooIiEvRPfRPT_(ptr noundef nonnull align 8 dereferenceable(8) [[TMP0]], ptr noundef nonnull align 8 dereferenceable(8) [[TMP1]]) +// SIMD-ONLY00-NEXT: ret void +// +// +// SIMD-ONLY00-LABEL: define {{[^@]+}}@_Z3fooIiEvRPfRPT_ +// SIMD-ONLY00-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[LR:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[TR:%.*]]) #[[ATTR0]] comdat { +// SIMD-ONLY00-NEXT: entry: +// SIMD-ONLY00-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY00-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY00-NEXT: [[L:%.*]] = alloca ptr, align 8 +// SIMD-ONLY00-NEXT: [[T:%.*]] = alloca ptr, align 8 +// SIMD-ONLY00-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// SIMD-ONLY00-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// SIMD-ONLY00-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 +// SIMD-ONLY00-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 +// SIMD-ONLY00-NEXT: [[_TMP9:%.*]] = alloca ptr, align 8 +// SIMD-ONLY00-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 8 +// SIMD-ONLY00-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// SIMD-ONLY00-NEXT: [[TMP0:%.*]] = load ptr, ptr @g, align 8 +// SIMD-ONLY00-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY00-NEXT: store ptr [[INCDEC_PTR]], ptr @g, align 8 +// SIMD-ONLY00-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L]], align 8 +// SIMD-ONLY00-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// SIMD-ONLY00-NEXT: store ptr [[INCDEC_PTR1]], ptr [[L]], align 8 +// SIMD-ONLY00-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T]], align 8 +// SIMD-ONLY00-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 +// SIMD-ONLY00-NEXT: store ptr [[INCDEC_PTR2]], ptr [[T]], align 8 +// SIMD-ONLY00-NEXT: [[TMP3:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// SIMD-ONLY00-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// SIMD-ONLY00-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// SIMD-ONLY00-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// SIMD-ONLY00-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// SIMD-ONLY00-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 1 +// SIMD-ONLY00-NEXT: store ptr [[INCDEC_PTR3]], ptr [[TMP5]], align 8 +// SIMD-ONLY00-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// SIMD-ONLY00-NEXT: store ptr [[TMP7]], ptr [[_TMP4]], align 8 +// SIMD-ONLY00-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// SIMD-ONLY00-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// SIMD-ONLY00-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// SIMD-ONLY00-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +// SIMD-ONLY00-NEXT: store ptr [[INCDEC_PTR5]], ptr [[TMP9]], align 8 +// SIMD-ONLY00-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// SIMD-ONLY00-NEXT: store ptr [[TMP11]], ptr [[_TMP6]], align 8 +// SIMD-ONLY00-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// SIMD-ONLY00-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP6]], align 8 +// SIMD-ONLY00-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// SIMD-ONLY00-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 1 +// SIMD-ONLY00-NEXT: store ptr [[INCDEC_PTR7]], ptr [[TMP13]], align 8 +// SIMD-ONLY00-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// SIMD-ONLY00-NEXT: store ptr [[TMP15]], ptr [[_TMP8]], align 8 +// SIMD-ONLY00-NEXT: [[TMP16:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// SIMD-ONLY00-NEXT: store ptr [[TMP16]], ptr [[_TMP9]], align 8 +// SIMD-ONLY00-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// SIMD-ONLY00-NEXT: [[TMP18:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// SIMD-ONLY00-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP8]], align 8 +// SIMD-ONLY00-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// SIMD-ONLY00-NEXT: [[INCDEC_PTR10:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1 +// SIMD-ONLY00-NEXT: store ptr [[INCDEC_PTR10]], ptr [[TMP19]], align 8 +// SIMD-ONLY00-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP9]], align 8 +// SIMD-ONLY00-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// SIMD-ONLY00-NEXT: [[INCDEC_PTR11:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 1 +// SIMD-ONLY00-NEXT: store ptr [[INCDEC_PTR11]], ptr [[TMP21]], align 8 +// SIMD-ONLY00-NEXT: ret void +// +// +// SIMD-ONLY01-LABEL: define {{[^@]+}}@_Z3barRPfRPi +// SIMD-ONLY01-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY01-NEXT: entry: +// SIMD-ONLY01-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY01-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY01-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// SIMD-ONLY01-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// SIMD-ONLY01-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// SIMD-ONLY01-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// SIMD-ONLY01-NEXT: call void @_Z3fooIiEvRPfRPT_(ptr noundef nonnull align 8 dereferenceable(8) [[TMP0]], ptr noundef nonnull align 8 dereferenceable(8) [[TMP1]]) +// SIMD-ONLY01-NEXT: ret void +// +// +// SIMD-ONLY01-LABEL: define {{[^@]+}}@_Z3fooIiEvRPfRPT_ +// SIMD-ONLY01-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[LR:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[TR:%.*]]) #[[ATTR0]] comdat { +// SIMD-ONLY01-NEXT: entry: +// SIMD-ONLY01-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY01-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY01-NEXT: [[L:%.*]] = alloca ptr, align 8 +// SIMD-ONLY01-NEXT: [[T:%.*]] = alloca ptr, align 8 +// SIMD-ONLY01-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// SIMD-ONLY01-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// SIMD-ONLY01-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 +// SIMD-ONLY01-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 +// SIMD-ONLY01-NEXT: [[_TMP9:%.*]] = alloca ptr, align 8 +// SIMD-ONLY01-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: [[TMP0:%.*]] = load ptr, ptr @g, align 8 +// SIMD-ONLY01-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY01-NEXT: store ptr [[INCDEC_PTR]], ptr @g, align 8 +// SIMD-ONLY01-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L]], align 8 +// SIMD-ONLY01-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// SIMD-ONLY01-NEXT: store ptr [[INCDEC_PTR1]], ptr [[L]], align 8 +// SIMD-ONLY01-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T]], align 8 +// SIMD-ONLY01-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 +// SIMD-ONLY01-NEXT: store ptr [[INCDEC_PTR2]], ptr [[T]], align 8 +// SIMD-ONLY01-NEXT: [[TMP3:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// SIMD-ONLY01-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// SIMD-ONLY01-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// SIMD-ONLY01-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 1 +// SIMD-ONLY01-NEXT: store ptr [[INCDEC_PTR3]], ptr [[TMP5]], align 8 +// SIMD-ONLY01-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: store ptr [[TMP7]], ptr [[_TMP4]], align 8 +// SIMD-ONLY01-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// SIMD-ONLY01-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// SIMD-ONLY01-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +// SIMD-ONLY01-NEXT: store ptr [[INCDEC_PTR5]], ptr [[TMP9]], align 8 +// SIMD-ONLY01-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: store ptr [[TMP11]], ptr [[_TMP6]], align 8 +// SIMD-ONLY01-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP6]], align 8 +// SIMD-ONLY01-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// SIMD-ONLY01-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 1 +// SIMD-ONLY01-NEXT: store ptr [[INCDEC_PTR7]], ptr [[TMP13]], align 8 +// SIMD-ONLY01-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: store ptr [[TMP15]], ptr [[_TMP8]], align 8 +// SIMD-ONLY01-NEXT: [[TMP16:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: store ptr [[TMP16]], ptr [[_TMP9]], align 8 +// SIMD-ONLY01-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: [[TMP18:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP8]], align 8 +// SIMD-ONLY01-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// SIMD-ONLY01-NEXT: [[INCDEC_PTR10:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1 +// SIMD-ONLY01-NEXT: store ptr [[INCDEC_PTR10]], ptr [[TMP19]], align 8 +// SIMD-ONLY01-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP9]], align 8 +// SIMD-ONLY01-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// SIMD-ONLY01-NEXT: [[INCDEC_PTR11:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 1 +// SIMD-ONLY01-NEXT: store ptr [[INCDEC_PTR11]], ptr [[TMP21]], align 8 +// SIMD-ONLY01-NEXT: ret void +// +// +// SIMD-ONLY02-LABEL: define {{[^@]+}}@_Z3barRPfRPi +// SIMD-ONLY02-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY02-NEXT: entry: +// SIMD-ONLY02-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY02-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY02-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// SIMD-ONLY02-NEXT: call void @_Z3fooIiEvRPfRPT_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP0]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +// SIMD-ONLY02-NEXT: ret void +// +// +// SIMD-ONLY02-LABEL: define {{[^@]+}}@_Z3fooIiEvRPfRPT_ +// SIMD-ONLY02-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TR:%.*]]) #[[ATTR0]] comdat { +// SIMD-ONLY02-NEXT: entry: +// SIMD-ONLY02-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY02-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY02-NEXT: [[L:%.*]] = alloca ptr, align 4 +// SIMD-ONLY02-NEXT: [[T:%.*]] = alloca ptr, align 4 +// SIMD-ONLY02-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// SIMD-ONLY02-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// SIMD-ONLY02-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// SIMD-ONLY02-NEXT: [[_TMP8:%.*]] = alloca ptr, align 4 +// SIMD-ONLY02-NEXT: [[_TMP9:%.*]] = alloca ptr, align 4 +// SIMD-ONLY02-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[TMP0:%.*]] = load ptr, ptr @g, align 4 +// SIMD-ONLY02-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY02-NEXT: store ptr [[INCDEC_PTR]], ptr @g, align 4 +// SIMD-ONLY02-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L]], align 4 +// SIMD-ONLY02-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// SIMD-ONLY02-NEXT: store ptr [[INCDEC_PTR1]], ptr [[L]], align 4 +// SIMD-ONLY02-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T]], align 4 +// SIMD-ONLY02-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 +// SIMD-ONLY02-NEXT: store ptr [[INCDEC_PTR2]], ptr [[T]], align 4 +// SIMD-ONLY02-NEXT: [[TMP3:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// SIMD-ONLY02-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 +// SIMD-ONLY02-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// SIMD-ONLY02-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 1 +// SIMD-ONLY02-NEXT: store ptr [[INCDEC_PTR3]], ptr [[TMP5]], align 4 +// SIMD-ONLY02-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store ptr [[TMP7]], ptr [[_TMP4]], align 4 +// SIMD-ONLY02-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// SIMD-ONLY02-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// SIMD-ONLY02-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +// SIMD-ONLY02-NEXT: store ptr [[INCDEC_PTR5]], ptr [[TMP9]], align 4 +// SIMD-ONLY02-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store ptr [[TMP11]], ptr [[_TMP6]], align 4 +// SIMD-ONLY02-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP6]], align 4 +// SIMD-ONLY02-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// SIMD-ONLY02-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 1 +// SIMD-ONLY02-NEXT: store ptr [[INCDEC_PTR7]], ptr [[TMP13]], align 4 +// SIMD-ONLY02-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store ptr [[TMP15]], ptr [[_TMP8]], align 4 +// SIMD-ONLY02-NEXT: [[TMP16:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store ptr [[TMP16]], ptr [[_TMP9]], align 4 +// SIMD-ONLY02-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[TMP18:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP8]], align 4 +// SIMD-ONLY02-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 +// SIMD-ONLY02-NEXT: [[INCDEC_PTR10:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1 +// SIMD-ONLY02-NEXT: store ptr [[INCDEC_PTR10]], ptr [[TMP19]], align 4 +// SIMD-ONLY02-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP9]], align 4 +// SIMD-ONLY02-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4 +// SIMD-ONLY02-NEXT: [[INCDEC_PTR11:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 1 +// SIMD-ONLY02-NEXT: store ptr [[INCDEC_PTR11]], ptr [[TMP21]], align 4 +// SIMD-ONLY02-NEXT: ret void +// +// +// SIMD-ONLY03-LABEL: define {{[^@]+}}@_Z3barRPfRPi +// SIMD-ONLY03-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY03-NEXT: entry: +// SIMD-ONLY03-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY03-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY03-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// SIMD-ONLY03-NEXT: call void @_Z3fooIiEvRPfRPT_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP0]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +// SIMD-ONLY03-NEXT: ret void +// +// +// SIMD-ONLY03-LABEL: define {{[^@]+}}@_Z3fooIiEvRPfRPT_ +// SIMD-ONLY03-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TR:%.*]]) #[[ATTR0]] comdat { +// SIMD-ONLY03-NEXT: entry: +// SIMD-ONLY03-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY03-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY03-NEXT: [[L:%.*]] = alloca ptr, align 4 +// SIMD-ONLY03-NEXT: [[T:%.*]] = alloca ptr, align 4 +// SIMD-ONLY03-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// SIMD-ONLY03-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// SIMD-ONLY03-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// SIMD-ONLY03-NEXT: [[_TMP8:%.*]] = alloca ptr, align 4 +// SIMD-ONLY03-NEXT: [[_TMP9:%.*]] = alloca ptr, align 4 +// SIMD-ONLY03-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[TMP0:%.*]] = load ptr, ptr @g, align 4 +// SIMD-ONLY03-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY03-NEXT: store ptr [[INCDEC_PTR]], ptr @g, align 4 +// SIMD-ONLY03-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L]], align 4 +// SIMD-ONLY03-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// SIMD-ONLY03-NEXT: store ptr [[INCDEC_PTR1]], ptr [[L]], align 4 +// SIMD-ONLY03-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T]], align 4 +// SIMD-ONLY03-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 +// SIMD-ONLY03-NEXT: store ptr [[INCDEC_PTR2]], ptr [[T]], align 4 +// SIMD-ONLY03-NEXT: [[TMP3:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// SIMD-ONLY03-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 +// SIMD-ONLY03-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// SIMD-ONLY03-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 1 +// SIMD-ONLY03-NEXT: store ptr [[INCDEC_PTR3]], ptr [[TMP5]], align 4 +// SIMD-ONLY03-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store ptr [[TMP7]], ptr [[_TMP4]], align 4 +// SIMD-ONLY03-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// SIMD-ONLY03-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// SIMD-ONLY03-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +// SIMD-ONLY03-NEXT: store ptr [[INCDEC_PTR5]], ptr [[TMP9]], align 4 +// SIMD-ONLY03-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store ptr [[TMP11]], ptr [[_TMP6]], align 4 +// SIMD-ONLY03-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP6]], align 4 +// SIMD-ONLY03-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// SIMD-ONLY03-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 1 +// SIMD-ONLY03-NEXT: store ptr [[INCDEC_PTR7]], ptr [[TMP13]], align 4 +// SIMD-ONLY03-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store ptr [[TMP15]], ptr [[_TMP8]], align 4 +// SIMD-ONLY03-NEXT: [[TMP16:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store ptr [[TMP16]], ptr [[_TMP9]], align 4 +// SIMD-ONLY03-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[TMP18:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP8]], align 4 +// SIMD-ONLY03-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 +// SIMD-ONLY03-NEXT: [[INCDEC_PTR10:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1 +// SIMD-ONLY03-NEXT: store ptr [[INCDEC_PTR10]], ptr [[TMP19]], align 4 +// SIMD-ONLY03-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP9]], align 4 +// SIMD-ONLY03-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4 +// SIMD-ONLY03-NEXT: [[INCDEC_PTR11:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 1 +// SIMD-ONLY03-NEXT: store ptr [[INCDEC_PTR11]], ptr [[TMP21]], align 4 +// SIMD-ONLY03-NEXT: ret void +// +// +// CK20-LABEL: define {{[^@]+}}@_Z3barPd +// CK20-SAME: (ptr noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +// CK20-NEXT: entry: +// CK20-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CK20-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 8 +// CK20-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 +// CK20-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) +// CK20-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) +// CK20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 +// CK20-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK20-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 8 +// CK20-NEXT: ret void +// +// +// CK20-LABEL: define {{[^@]+}}@_ZN2STIdEC1ERPd +// CK20-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CK20-NEXT: entry: +// CK20-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK20-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CK20-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK20-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CK20-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CK20-NEXT: call void @_ZN2STIdEC2ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]], ptr noundef nonnull align 8 dereferenceable(8) [[TMP0]]) +// CK20-NEXT: ret void +// +// +// CK20-LABEL: define {{[^@]+}}@_ZN2STIdE3fooERPd +// CK20-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG:%.*]]) #[[ATTR0]] comdat align 2 { +// CK20-NEXT: entry: +// CK20-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK20-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CK20-NEXT: [[LA:%.*]] = alloca ptr, align 8 +// CK20-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CK20-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CK20-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CK20-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK20-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x ptr], align 8 +// CK20-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x ptr], align 8 +// CK20-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [2 x ptr], align 8 +// CK20-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [2 x i64], align 8 +// CK20-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK20-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [3 x ptr], align 8 +// CK20-NEXT: [[DOTOFFLOAD_PTRS11:%.*]] = alloca [3 x ptr], align 8 +// CK20-NEXT: [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [3 x ptr], align 8 +// CK20-NEXT: [[DOTOFFLOAD_SIZES13:%.*]] = alloca [3 x i64], align 8 +// CK20-NEXT: [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK20-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK20-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 +// CK20-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK20-NEXT: store ptr null, ptr [[LA]], align 8 +// CK20-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CK20-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK20-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 +// CK20-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK20-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CK20-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CK20-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CK20-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK20-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK20-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK20-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CK20-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK20-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CK20-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK20-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CK20-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK20-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CK20-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK20-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CK20-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK20-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CK20-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK20-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CK20-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK20-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CK20-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK20-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CK20-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK20-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK20-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK20-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP15]], align 4 +// CK20-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK20-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CK20-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK20-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CK20-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112.region_id, ptr [[KERNEL_ARGS]]) +// CK20-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CK20-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK20: omp_offload.failed: +// CK20-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112(ptr [[THIS1]]) #[[ATTR3:[0-9]+]] +// CK20-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK20: omp_offload.cont: +// CK20-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK20-NEXT: [[TMP20:%.*]] = load ptr, ptr [[B]], align 8 +// CK20-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[B]], i32 1 +// CK20-NEXT: [[TMP22:%.*]] = ptrtoint ptr [[TMP21]] to i64 +// CK20-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[B]] to i64 +// CK20-NEXT: [[TMP24:%.*]] = sub i64 [[TMP22]], [[TMP23]] +// CK20-NEXT: [[TMP25:%.*]] = sdiv exact i64 [[TMP24]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CK20-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes.1, i64 16, i1 false) +// CK20-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CK20-NEXT: store ptr [[THIS1]], ptr [[TMP26]], align 8 +// CK20-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CK20-NEXT: store ptr [[B]], ptr [[TMP27]], align 8 +// CK20-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CK20-NEXT: store i64 [[TMP25]], ptr [[TMP28]], align 8 +// CK20-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 0 +// CK20-NEXT: store ptr null, ptr [[TMP29]], align 8 +// CK20-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1 +// CK20-NEXT: store ptr [[THIS1]], ptr [[TMP30]], align 8 +// CK20-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 1 +// CK20-NEXT: store ptr [[TMP20]], ptr [[TMP31]], align 8 +// CK20-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 1 +// CK20-NEXT: store ptr null, ptr [[TMP32]], align 8 +// CK20-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CK20-NEXT: [[TMP34:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CK20-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CK20-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 +// CK20-NEXT: store i32 2, ptr [[TMP36]], align 4 +// CK20-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 +// CK20-NEXT: store i32 2, ptr [[TMP37]], align 4 +// CK20-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 +// CK20-NEXT: store ptr [[TMP33]], ptr [[TMP38]], align 8 +// CK20-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3 +// CK20-NEXT: store ptr [[TMP34]], ptr [[TMP39]], align 8 +// CK20-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4 +// CK20-NEXT: store ptr [[TMP35]], ptr [[TMP40]], align 8 +// CK20-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5 +// CK20-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP41]], align 8 +// CK20-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6 +// CK20-NEXT: store ptr null, ptr [[TMP42]], align 8 +// CK20-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7 +// CK20-NEXT: store ptr null, ptr [[TMP43]], align 8 +// CK20-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8 +// CK20-NEXT: store i64 0, ptr [[TMP44]], align 8 +// CK20-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9 +// CK20-NEXT: store i64 0, ptr [[TMP45]], align 8 +// CK20-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10 +// CK20-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP46]], align 4 +// CK20-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11 +// CK20-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP47]], align 4 +// CK20-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12 +// CK20-NEXT: store i32 0, ptr [[TMP48]], align 4 +// CK20-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118.region_id, ptr [[KERNEL_ARGS5]]) +// CK20-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CK20-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CK20: omp_offload.failed6: +// CK20-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118(ptr [[THIS1]]) #[[ATTR3]] +// CK20-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CK20: omp_offload.cont7: +// CK20-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 +// CK20-NEXT: [[B9:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK20-NEXT: [[TMP51:%.*]] = load ptr, ptr [[B9]], align 8 +// CK20-NEXT: [[TMP52:%.*]] = getelementptr ptr, ptr [[B9]], i32 1 +// CK20-NEXT: [[TMP53:%.*]] = ptrtoint ptr [[TMP52]] to i64 +// CK20-NEXT: [[TMP54:%.*]] = ptrtoint ptr [[A8]] to i64 +// CK20-NEXT: [[TMP55:%.*]] = sub i64 [[TMP53]], [[TMP54]] +// CK20-NEXT: [[TMP56:%.*]] = sdiv exact i64 [[TMP55]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CK20-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES13]], ptr align 8 @.offload_sizes.3, i64 24, i1 false) +// CK20-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CK20-NEXT: store ptr [[THIS1]], ptr [[TMP57]], align 8 +// CK20-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CK20-NEXT: store ptr [[A8]], ptr [[TMP58]], align 8 +// CK20-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 +// CK20-NEXT: store i64 [[TMP56]], ptr [[TMP59]], align 8 +// CK20-NEXT: [[TMP60:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0 +// CK20-NEXT: store ptr null, ptr [[TMP60]], align 8 +// CK20-NEXT: [[TMP61:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1 +// CK20-NEXT: store ptr [[THIS1]], ptr [[TMP61]], align 8 +// CK20-NEXT: [[TMP62:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 1 +// CK20-NEXT: store ptr [[A8]], ptr [[TMP62]], align 8 +// CK20-NEXT: [[TMP63:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1 +// CK20-NEXT: store ptr null, ptr [[TMP63]], align 8 +// CK20-NEXT: [[TMP64:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 2 +// CK20-NEXT: store ptr [[THIS1]], ptr [[TMP64]], align 8 +// CK20-NEXT: [[TMP65:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 2 +// CK20-NEXT: store ptr [[TMP51]], ptr [[TMP65]], align 8 +// CK20-NEXT: [[TMP66:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 2 +// CK20-NEXT: store ptr null, ptr [[TMP66]], align 8 +// CK20-NEXT: [[TMP67:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CK20-NEXT: [[TMP68:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CK20-NEXT: [[TMP69:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 +// CK20-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 +// CK20-NEXT: store i32 2, ptr [[TMP70]], align 4 +// CK20-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 +// CK20-NEXT: store i32 3, ptr [[TMP71]], align 4 +// CK20-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 +// CK20-NEXT: store ptr [[TMP67]], ptr [[TMP72]], align 8 +// CK20-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 3 +// CK20-NEXT: store ptr [[TMP68]], ptr [[TMP73]], align 8 +// CK20-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 4 +// CK20-NEXT: store ptr [[TMP69]], ptr [[TMP74]], align 8 +// CK20-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 5 +// CK20-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP75]], align 8 +// CK20-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 6 +// CK20-NEXT: store ptr null, ptr [[TMP76]], align 8 +// CK20-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 7 +// CK20-NEXT: store ptr null, ptr [[TMP77]], align 8 +// CK20-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 8 +// CK20-NEXT: store i64 0, ptr [[TMP78]], align 8 +// CK20-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 9 +// CK20-NEXT: store i64 0, ptr [[TMP79]], align 8 +// CK20-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 10 +// CK20-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP80]], align 4 +// CK20-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 11 +// CK20-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP81]], align 4 +// CK20-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 12 +// CK20-NEXT: store i32 0, ptr [[TMP82]], align 4 +// CK20-NEXT: [[TMP83:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125.region_id, ptr [[KERNEL_ARGS14]]) +// CK20-NEXT: [[TMP84:%.*]] = icmp ne i32 [[TMP83]], 0 +// CK20-NEXT: br i1 [[TMP84]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]] +// CK20: omp_offload.failed15: +// CK20-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125(ptr [[THIS1]]) #[[ATTR3]] +// CK20-NEXT: br label [[OMP_OFFLOAD_CONT16]] +// CK20: omp_offload.cont16: +// CK20-NEXT: ret void +// +// +// CK20-LABEL: define {{[^@]+}}@_ZN2STIdEC2ERPd +// CK20-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CK20-NEXT: entry: +// CK20-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK20-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CK20-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK20-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CK20-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK20-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CK20-NEXT: store ptr null, ptr [[A]], align 8 +// CK20-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CK20-NEXT: store ptr [[TMP0]], ptr [[B2]], align 8 +// CK20-NEXT: ret void +// +// +// CK20-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112 +// CK20-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { +// CK20-NEXT: entry: +// CK20-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK20-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK20-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CK20-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8 +// CK20-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK20-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 8 +// CK20-NEXT: ret void +// +// +// CK20-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118 +// CK20-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CK20-NEXT: entry: +// CK20-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK20-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK20-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 1 +// CK20-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 8 +// CK20-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CK20-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// CK20-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP1]], align 8 +// CK20-NEXT: ret void +// +// +// CK20-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125 +// CK20-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CK20-NEXT: entry: +// CK20-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK20-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK20-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CK20-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8 +// CK20-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK20-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 8 +// CK20-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP0]], i32 0, i32 1 +// CK20-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B]], align 8 +// CK20-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CK20-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// CK20-NEXT: store ptr [[INCDEC_PTR1]], ptr [[TMP2]], align 8 +// CK20-NEXT: ret void +// +// +// CK20-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK20-SAME: () #[[ATTR5:[0-9]+]] { +// CK20-NEXT: entry: +// CK20-NEXT: call void @__tgt_register_requires(i64 1) +// CK20-NEXT: ret void +// +// +// CK21-LABEL: define {{[^@]+}}@_Z3barPd +// CK21-SAME: (ptr noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +// CK21-NEXT: entry: +// CK21-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CK21-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 8 +// CK21-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 +// CK21-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) +// CK21-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) +// CK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 +// CK21-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK21-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 8 +// CK21-NEXT: ret void +// +// +// CK21-LABEL: define {{[^@]+}}@_ZN2STIdEC1ERPd +// CK21-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CK21-NEXT: entry: +// CK21-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CK21-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CK21-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CK21-NEXT: call void @_ZN2STIdEC2ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]], ptr noundef nonnull align 8 dereferenceable(8) [[TMP0]]) +// CK21-NEXT: ret void +// +// +// CK21-LABEL: define {{[^@]+}}@_ZN2STIdE3fooERPd +// CK21-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG:%.*]]) #[[ATTR0]] comdat align 2 { +// CK21-NEXT: entry: +// CK21-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK21-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CK21-NEXT: [[LA:%.*]] = alloca ptr, align 8 +// CK21-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CK21-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CK21-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CK21-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK21-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x ptr], align 8 +// CK21-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x ptr], align 8 +// CK21-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [2 x ptr], align 8 +// CK21-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [2 x i64], align 8 +// CK21-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK21-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [3 x ptr], align 8 +// CK21-NEXT: [[DOTOFFLOAD_PTRS11:%.*]] = alloca [3 x ptr], align 8 +// CK21-NEXT: [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [3 x ptr], align 8 +// CK21-NEXT: [[DOTOFFLOAD_SIZES13:%.*]] = alloca [3 x i64], align 8 +// CK21-NEXT: [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK21-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK21-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 +// CK21-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK21-NEXT: store ptr null, ptr [[LA]], align 8 +// CK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK21-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 +// CK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK21-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CK21-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK21-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK21-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CK21-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK21-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK21-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CK21-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK21-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK21-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CK21-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK21-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CK21-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK21-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CK21-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK21-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CK21-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK21-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CK21-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK21-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK21-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK21-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP15]], align 4 +// CK21-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK21-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CK21-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK21-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CK21-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112.region_id, ptr [[KERNEL_ARGS]]) +// CK21-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CK21-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK21: omp_offload.failed: +// CK21-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112(ptr [[THIS1]]) #[[ATTR3:[0-9]+]] +// CK21-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK21: omp_offload.cont: +// CK21-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK21-NEXT: [[TMP20:%.*]] = load ptr, ptr [[B]], align 8 +// CK21-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[B]], i32 1 +// CK21-NEXT: [[TMP22:%.*]] = ptrtoint ptr [[TMP21]] to i64 +// CK21-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[B]] to i64 +// CK21-NEXT: [[TMP24:%.*]] = sub i64 [[TMP22]], [[TMP23]] +// CK21-NEXT: [[TMP25:%.*]] = sdiv exact i64 [[TMP24]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CK21-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes.1, i64 16, i1 false) +// CK21-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CK21-NEXT: store ptr [[THIS1]], ptr [[TMP26]], align 8 +// CK21-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CK21-NEXT: store ptr [[B]], ptr [[TMP27]], align 8 +// CK21-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CK21-NEXT: store i64 [[TMP25]], ptr [[TMP28]], align 8 +// CK21-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 0 +// CK21-NEXT: store ptr null, ptr [[TMP29]], align 8 +// CK21-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1 +// CK21-NEXT: store ptr [[THIS1]], ptr [[TMP30]], align 8 +// CK21-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 1 +// CK21-NEXT: store ptr [[TMP20]], ptr [[TMP31]], align 8 +// CK21-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 1 +// CK21-NEXT: store ptr null, ptr [[TMP32]], align 8 +// CK21-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CK21-NEXT: [[TMP34:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CK21-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CK21-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 +// CK21-NEXT: store i32 2, ptr [[TMP36]], align 4 +// CK21-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 +// CK21-NEXT: store i32 2, ptr [[TMP37]], align 4 +// CK21-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 +// CK21-NEXT: store ptr [[TMP33]], ptr [[TMP38]], align 8 +// CK21-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3 +// CK21-NEXT: store ptr [[TMP34]], ptr [[TMP39]], align 8 +// CK21-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4 +// CK21-NEXT: store ptr [[TMP35]], ptr [[TMP40]], align 8 +// CK21-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5 +// CK21-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP41]], align 8 +// CK21-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6 +// CK21-NEXT: store ptr null, ptr [[TMP42]], align 8 +// CK21-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7 +// CK21-NEXT: store ptr null, ptr [[TMP43]], align 8 +// CK21-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8 +// CK21-NEXT: store i64 0, ptr [[TMP44]], align 8 +// CK21-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9 +// CK21-NEXT: store i64 0, ptr [[TMP45]], align 8 +// CK21-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10 +// CK21-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP46]], align 4 +// CK21-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11 +// CK21-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP47]], align 4 +// CK21-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12 +// CK21-NEXT: store i32 0, ptr [[TMP48]], align 4 +// CK21-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118.region_id, ptr [[KERNEL_ARGS5]]) +// CK21-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CK21-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CK21: omp_offload.failed6: +// CK21-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118(ptr [[THIS1]]) #[[ATTR3]] +// CK21-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CK21: omp_offload.cont7: +// CK21-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 +// CK21-NEXT: [[B9:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK21-NEXT: [[TMP51:%.*]] = load ptr, ptr [[B9]], align 8 +// CK21-NEXT: [[TMP52:%.*]] = getelementptr ptr, ptr [[B9]], i32 1 +// CK21-NEXT: [[TMP53:%.*]] = ptrtoint ptr [[TMP52]] to i64 +// CK21-NEXT: [[TMP54:%.*]] = ptrtoint ptr [[A8]] to i64 +// CK21-NEXT: [[TMP55:%.*]] = sub i64 [[TMP53]], [[TMP54]] +// CK21-NEXT: [[TMP56:%.*]] = sdiv exact i64 [[TMP55]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CK21-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES13]], ptr align 8 @.offload_sizes.3, i64 24, i1 false) +// CK21-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CK21-NEXT: store ptr [[THIS1]], ptr [[TMP57]], align 8 +// CK21-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CK21-NEXT: store ptr [[A8]], ptr [[TMP58]], align 8 +// CK21-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 +// CK21-NEXT: store i64 [[TMP56]], ptr [[TMP59]], align 8 +// CK21-NEXT: [[TMP60:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0 +// CK21-NEXT: store ptr null, ptr [[TMP60]], align 8 +// CK21-NEXT: [[TMP61:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1 +// CK21-NEXT: store ptr [[THIS1]], ptr [[TMP61]], align 8 +// CK21-NEXT: [[TMP62:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 1 +// CK21-NEXT: store ptr [[A8]], ptr [[TMP62]], align 8 +// CK21-NEXT: [[TMP63:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1 +// CK21-NEXT: store ptr null, ptr [[TMP63]], align 8 +// CK21-NEXT: [[TMP64:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 2 +// CK21-NEXT: store ptr [[THIS1]], ptr [[TMP64]], align 8 +// CK21-NEXT: [[TMP65:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 2 +// CK21-NEXT: store ptr [[TMP51]], ptr [[TMP65]], align 8 +// CK21-NEXT: [[TMP66:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 2 +// CK21-NEXT: store ptr null, ptr [[TMP66]], align 8 +// CK21-NEXT: [[TMP67:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CK21-NEXT: [[TMP68:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CK21-NEXT: [[TMP69:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 +// CK21-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 +// CK21-NEXT: store i32 2, ptr [[TMP70]], align 4 +// CK21-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 +// CK21-NEXT: store i32 3, ptr [[TMP71]], align 4 +// CK21-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 +// CK21-NEXT: store ptr [[TMP67]], ptr [[TMP72]], align 8 +// CK21-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 3 +// CK21-NEXT: store ptr [[TMP68]], ptr [[TMP73]], align 8 +// CK21-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 4 +// CK21-NEXT: store ptr [[TMP69]], ptr [[TMP74]], align 8 +// CK21-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 5 +// CK21-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP75]], align 8 +// CK21-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 6 +// CK21-NEXT: store ptr null, ptr [[TMP76]], align 8 +// CK21-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 7 +// CK21-NEXT: store ptr null, ptr [[TMP77]], align 8 +// CK21-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 8 +// CK21-NEXT: store i64 0, ptr [[TMP78]], align 8 +// CK21-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 9 +// CK21-NEXT: store i64 0, ptr [[TMP79]], align 8 +// CK21-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 10 +// CK21-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP80]], align 4 +// CK21-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 11 +// CK21-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP81]], align 4 +// CK21-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 12 +// CK21-NEXT: store i32 0, ptr [[TMP82]], align 4 +// CK21-NEXT: [[TMP83:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125.region_id, ptr [[KERNEL_ARGS14]]) +// CK21-NEXT: [[TMP84:%.*]] = icmp ne i32 [[TMP83]], 0 +// CK21-NEXT: br i1 [[TMP84]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]] +// CK21: omp_offload.failed15: +// CK21-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125(ptr [[THIS1]]) #[[ATTR3]] +// CK21-NEXT: br label [[OMP_OFFLOAD_CONT16]] +// CK21: omp_offload.cont16: +// CK21-NEXT: ret void +// +// +// CK21-LABEL: define {{[^@]+}}@_ZN2STIdEC2ERPd +// CK21-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CK21-NEXT: entry: +// CK21-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CK21-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CK21-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CK21-NEXT: store ptr null, ptr [[A]], align 8 +// CK21-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CK21-NEXT: store ptr [[TMP0]], ptr [[B2]], align 8 +// CK21-NEXT: ret void +// +// +// CK21-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112 +// CK21-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { +// CK21-NEXT: entry: +// CK21-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK21-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8 +// CK21-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK21-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 8 +// CK21-NEXT: ret void +// +// +// CK21-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118 +// CK21-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CK21-NEXT: entry: +// CK21-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK21-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK21-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 1 +// CK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 8 +// CK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CK21-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// CK21-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP1]], align 8 +// CK21-NEXT: ret void +// +// +// CK21-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125 +// CK21-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CK21-NEXT: entry: +// CK21-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK21-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8 +// CK21-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK21-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 8 +// CK21-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP0]], i32 0, i32 1 +// CK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B]], align 8 +// CK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CK21-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// CK21-NEXT: store ptr [[INCDEC_PTR1]], ptr [[TMP2]], align 8 +// CK21-NEXT: ret void +// +// +// CK21-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK21-SAME: () #[[ATTR5:[0-9]+]] { +// CK21-NEXT: entry: +// CK21-NEXT: call void @__tgt_register_requires(i64 1) +// CK21-NEXT: ret void +// +// +// CK22-LABEL: define {{[^@]+}}@_Z3barPd +// CK22-SAME: (ptr noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +// CK22-NEXT: entry: +// CK22-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 4 +// CK22-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 +// CK22-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 4 +// CK22-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) +// CK22-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) +// CK22-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 4 +// CK22-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK22-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 4 +// CK22-NEXT: ret void +// +// +// CK22-LABEL: define {{[^@]+}}@_ZN2STIdEC1ERPd +// CK22-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CK22-NEXT: entry: +// CK22-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK22-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CK22-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK22-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CK22-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK22-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CK22-NEXT: call void @_ZN2STIdEC2ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0]]) +// CK22-NEXT: ret void +// +// +// CK22-LABEL: define {{[^@]+}}@_ZN2STIdE3fooERPd +// CK22-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR0]] comdat align 2 { +// CK22-NEXT: entry: +// CK22-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK22-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 4 +// CK22-NEXT: [[LA:%.*]] = alloca ptr, align 4 +// CK22-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CK22-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CK22-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CK22-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK22-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x ptr], align 4 +// CK22-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x ptr], align 4 +// CK22-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [2 x ptr], align 4 +// CK22-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [2 x i64], align 4 +// CK22-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK22-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [3 x ptr], align 4 +// CK22-NEXT: [[DOTOFFLOAD_PTRS11:%.*]] = alloca [3 x ptr], align 4 +// CK22-NEXT: [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [3 x ptr], align 4 +// CK22-NEXT: [[DOTOFFLOAD_SIZES13:%.*]] = alloca [3 x i64], align 4 +// CK22-NEXT: [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK22-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK22-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 4 +// CK22-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK22-NEXT: store ptr null, ptr [[LA]], align 4 +// CK22-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CK22-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK22-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4 +// CK22-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK22-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CK22-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CK22-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CK22-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK22-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK22-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK22-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CK22-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK22-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CK22-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK22-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CK22-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK22-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CK22-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK22-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CK22-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK22-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CK22-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK22-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CK22-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK22-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CK22-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK22-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CK22-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK22-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK22-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK22-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP15]], align 4 +// CK22-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK22-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CK22-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK22-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CK22-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112.region_id, ptr [[KERNEL_ARGS]]) +// CK22-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CK22-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK22: omp_offload.failed: +// CK22-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112(ptr [[THIS1]]) #[[ATTR3:[0-9]+]] +// CK22-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK22: omp_offload.cont: +// CK22-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK22-NEXT: [[TMP20:%.*]] = load ptr, ptr [[B]], align 4 +// CK22-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[B]], i32 1 +// CK22-NEXT: [[TMP22:%.*]] = ptrtoint ptr [[TMP21]] to i64 +// CK22-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[B]] to i64 +// CK22-NEXT: [[TMP24:%.*]] = sub i64 [[TMP22]], [[TMP23]] +// CK22-NEXT: [[TMP25:%.*]] = sdiv exact i64 [[TMP24]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CK22-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes.1, i32 16, i1 false) +// CK22-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CK22-NEXT: store ptr [[THIS1]], ptr [[TMP26]], align 4 +// CK22-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CK22-NEXT: store ptr [[B]], ptr [[TMP27]], align 4 +// CK22-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CK22-NEXT: store i64 [[TMP25]], ptr [[TMP28]], align 4 +// CK22-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0 +// CK22-NEXT: store ptr null, ptr [[TMP29]], align 4 +// CK22-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1 +// CK22-NEXT: store ptr [[THIS1]], ptr [[TMP30]], align 4 +// CK22-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 1 +// CK22-NEXT: store ptr [[TMP20]], ptr [[TMP31]], align 4 +// CK22-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1 +// CK22-NEXT: store ptr null, ptr [[TMP32]], align 4 +// CK22-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CK22-NEXT: [[TMP34:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CK22-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CK22-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 +// CK22-NEXT: store i32 2, ptr [[TMP36]], align 4 +// CK22-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 +// CK22-NEXT: store i32 2, ptr [[TMP37]], align 4 +// CK22-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 +// CK22-NEXT: store ptr [[TMP33]], ptr [[TMP38]], align 4 +// CK22-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3 +// CK22-NEXT: store ptr [[TMP34]], ptr [[TMP39]], align 4 +// CK22-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4 +// CK22-NEXT: store ptr [[TMP35]], ptr [[TMP40]], align 4 +// CK22-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5 +// CK22-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP41]], align 4 +// CK22-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6 +// CK22-NEXT: store ptr null, ptr [[TMP42]], align 4 +// CK22-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7 +// CK22-NEXT: store ptr null, ptr [[TMP43]], align 4 +// CK22-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8 +// CK22-NEXT: store i64 0, ptr [[TMP44]], align 8 +// CK22-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9 +// CK22-NEXT: store i64 0, ptr [[TMP45]], align 8 +// CK22-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10 +// CK22-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP46]], align 4 +// CK22-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11 +// CK22-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP47]], align 4 +// CK22-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12 +// CK22-NEXT: store i32 0, ptr [[TMP48]], align 4 +// CK22-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118.region_id, ptr [[KERNEL_ARGS5]]) +// CK22-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CK22-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CK22: omp_offload.failed6: +// CK22-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118(ptr [[THIS1]]) #[[ATTR3]] +// CK22-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CK22: omp_offload.cont7: +// CK22-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 +// CK22-NEXT: [[B9:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK22-NEXT: [[TMP51:%.*]] = load ptr, ptr [[B9]], align 4 +// CK22-NEXT: [[TMP52:%.*]] = getelementptr ptr, ptr [[B9]], i32 1 +// CK22-NEXT: [[TMP53:%.*]] = ptrtoint ptr [[TMP52]] to i64 +// CK22-NEXT: [[TMP54:%.*]] = ptrtoint ptr [[A8]] to i64 +// CK22-NEXT: [[TMP55:%.*]] = sub i64 [[TMP53]], [[TMP54]] +// CK22-NEXT: [[TMP56:%.*]] = sdiv exact i64 [[TMP55]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CK22-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES13]], ptr align 4 @.offload_sizes.3, i32 24, i1 false) +// CK22-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CK22-NEXT: store ptr [[THIS1]], ptr [[TMP57]], align 4 +// CK22-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CK22-NEXT: store ptr [[A8]], ptr [[TMP58]], align 4 +// CK22-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 +// CK22-NEXT: store i64 [[TMP56]], ptr [[TMP59]], align 4 +// CK22-NEXT: [[TMP60:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i32 0, i32 0 +// CK22-NEXT: store ptr null, ptr [[TMP60]], align 4 +// CK22-NEXT: [[TMP61:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1 +// CK22-NEXT: store ptr [[THIS1]], ptr [[TMP61]], align 4 +// CK22-NEXT: [[TMP62:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 1 +// CK22-NEXT: store ptr [[A8]], ptr [[TMP62]], align 4 +// CK22-NEXT: [[TMP63:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i32 0, i32 1 +// CK22-NEXT: store ptr null, ptr [[TMP63]], align 4 +// CK22-NEXT: [[TMP64:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 2 +// CK22-NEXT: store ptr [[THIS1]], ptr [[TMP64]], align 4 +// CK22-NEXT: [[TMP65:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 2 +// CK22-NEXT: store ptr [[TMP51]], ptr [[TMP65]], align 4 +// CK22-NEXT: [[TMP66:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i32 0, i32 2 +// CK22-NEXT: store ptr null, ptr [[TMP66]], align 4 +// CK22-NEXT: [[TMP67:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CK22-NEXT: [[TMP68:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CK22-NEXT: [[TMP69:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 +// CK22-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 +// CK22-NEXT: store i32 2, ptr [[TMP70]], align 4 +// CK22-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 +// CK22-NEXT: store i32 3, ptr [[TMP71]], align 4 +// CK22-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 +// CK22-NEXT: store ptr [[TMP67]], ptr [[TMP72]], align 4 +// CK22-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 3 +// CK22-NEXT: store ptr [[TMP68]], ptr [[TMP73]], align 4 +// CK22-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 4 +// CK22-NEXT: store ptr [[TMP69]], ptr [[TMP74]], align 4 +// CK22-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 5 +// CK22-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP75]], align 4 +// CK22-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 6 +// CK22-NEXT: store ptr null, ptr [[TMP76]], align 4 +// CK22-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 7 +// CK22-NEXT: store ptr null, ptr [[TMP77]], align 4 +// CK22-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 8 +// CK22-NEXT: store i64 0, ptr [[TMP78]], align 8 +// CK22-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 9 +// CK22-NEXT: store i64 0, ptr [[TMP79]], align 8 +// CK22-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 10 +// CK22-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP80]], align 4 +// CK22-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 11 +// CK22-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP81]], align 4 +// CK22-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 12 +// CK22-NEXT: store i32 0, ptr [[TMP82]], align 4 +// CK22-NEXT: [[TMP83:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125.region_id, ptr [[KERNEL_ARGS14]]) +// CK22-NEXT: [[TMP84:%.*]] = icmp ne i32 [[TMP83]], 0 +// CK22-NEXT: br i1 [[TMP84]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]] +// CK22: omp_offload.failed15: +// CK22-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125(ptr [[THIS1]]) #[[ATTR3]] +// CK22-NEXT: br label [[OMP_OFFLOAD_CONT16]] +// CK22: omp_offload.cont16: +// CK22-NEXT: ret void +// +// +// CK22-LABEL: define {{[^@]+}}@_ZN2STIdEC2ERPd +// CK22-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CK22-NEXT: entry: +// CK22-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK22-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CK22-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK22-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CK22-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK22-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CK22-NEXT: store ptr null, ptr [[A]], align 4 +// CK22-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK22-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CK22-NEXT: store ptr [[TMP0]], ptr [[B2]], align 4 +// CK22-NEXT: ret void +// +// +// CK22-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112 +// CK22-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { +// CK22-NEXT: entry: +// CK22-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK22-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK22-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK22-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CK22-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 4 +// CK22-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK22-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 4 +// CK22-NEXT: ret void +// +// +// CK22-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118 +// CK22-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CK22-NEXT: entry: +// CK22-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK22-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK22-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK22-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 1 +// CK22-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 4 +// CK22-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CK22-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// CK22-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP1]], align 4 +// CK22-NEXT: ret void +// +// +// CK22-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125 +// CK22-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CK22-NEXT: entry: +// CK22-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK22-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK22-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK22-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CK22-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 4 +// CK22-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK22-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 4 +// CK22-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP0]], i32 0, i32 1 +// CK22-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B]], align 4 +// CK22-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CK22-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// CK22-NEXT: store ptr [[INCDEC_PTR1]], ptr [[TMP2]], align 4 +// CK22-NEXT: ret void +// +// +// CK22-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK22-SAME: () #[[ATTR5:[0-9]+]] { +// CK22-NEXT: entry: +// CK22-NEXT: call void @__tgt_register_requires(i64 1) +// CK22-NEXT: ret void +// +// +// CK23-LABEL: define {{[^@]+}}@_Z3barPd +// CK23-SAME: (ptr noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +// CK23-NEXT: entry: +// CK23-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 4 +// CK23-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 +// CK23-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 4 +// CK23-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) +// CK23-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) +// CK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 4 +// CK23-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK23-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 4 +// CK23-NEXT: ret void +// +// +// CK23-LABEL: define {{[^@]+}}@_ZN2STIdEC1ERPd +// CK23-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CK23-NEXT: entry: +// CK23-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CK23-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CK23-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CK23-NEXT: call void @_ZN2STIdEC2ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0]]) +// CK23-NEXT: ret void +// +// +// CK23-LABEL: define {{[^@]+}}@_ZN2STIdE3fooERPd +// CK23-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR0]] comdat align 2 { +// CK23-NEXT: entry: +// CK23-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK23-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 4 +// CK23-NEXT: [[LA:%.*]] = alloca ptr, align 4 +// CK23-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CK23-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CK23-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CK23-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK23-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x ptr], align 4 +// CK23-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x ptr], align 4 +// CK23-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [2 x ptr], align 4 +// CK23-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [2 x i64], align 4 +// CK23-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK23-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [3 x ptr], align 4 +// CK23-NEXT: [[DOTOFFLOAD_PTRS11:%.*]] = alloca [3 x ptr], align 4 +// CK23-NEXT: [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [3 x ptr], align 4 +// CK23-NEXT: [[DOTOFFLOAD_SIZES13:%.*]] = alloca [3 x i64], align 4 +// CK23-NEXT: [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK23-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK23-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 4 +// CK23-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK23-NEXT: store ptr null, ptr [[LA]], align 4 +// CK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK23-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4 +// CK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK23-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CK23-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK23-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK23-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CK23-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK23-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK23-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CK23-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK23-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK23-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CK23-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK23-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CK23-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK23-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CK23-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK23-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CK23-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK23-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CK23-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK23-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK23-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK23-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP15]], align 4 +// CK23-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK23-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CK23-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK23-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CK23-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112.region_id, ptr [[KERNEL_ARGS]]) +// CK23-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CK23-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK23: omp_offload.failed: +// CK23-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112(ptr [[THIS1]]) #[[ATTR3:[0-9]+]] +// CK23-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK23: omp_offload.cont: +// CK23-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK23-NEXT: [[TMP20:%.*]] = load ptr, ptr [[B]], align 4 +// CK23-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[B]], i32 1 +// CK23-NEXT: [[TMP22:%.*]] = ptrtoint ptr [[TMP21]] to i64 +// CK23-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[B]] to i64 +// CK23-NEXT: [[TMP24:%.*]] = sub i64 [[TMP22]], [[TMP23]] +// CK23-NEXT: [[TMP25:%.*]] = sdiv exact i64 [[TMP24]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CK23-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes.1, i32 16, i1 false) +// CK23-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CK23-NEXT: store ptr [[THIS1]], ptr [[TMP26]], align 4 +// CK23-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CK23-NEXT: store ptr [[B]], ptr [[TMP27]], align 4 +// CK23-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CK23-NEXT: store i64 [[TMP25]], ptr [[TMP28]], align 4 +// CK23-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0 +// CK23-NEXT: store ptr null, ptr [[TMP29]], align 4 +// CK23-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1 +// CK23-NEXT: store ptr [[THIS1]], ptr [[TMP30]], align 4 +// CK23-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 1 +// CK23-NEXT: store ptr [[TMP20]], ptr [[TMP31]], align 4 +// CK23-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1 +// CK23-NEXT: store ptr null, ptr [[TMP32]], align 4 +// CK23-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CK23-NEXT: [[TMP34:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CK23-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CK23-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 +// CK23-NEXT: store i32 2, ptr [[TMP36]], align 4 +// CK23-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 +// CK23-NEXT: store i32 2, ptr [[TMP37]], align 4 +// CK23-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 +// CK23-NEXT: store ptr [[TMP33]], ptr [[TMP38]], align 4 +// CK23-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3 +// CK23-NEXT: store ptr [[TMP34]], ptr [[TMP39]], align 4 +// CK23-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4 +// CK23-NEXT: store ptr [[TMP35]], ptr [[TMP40]], align 4 +// CK23-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5 +// CK23-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP41]], align 4 +// CK23-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6 +// CK23-NEXT: store ptr null, ptr [[TMP42]], align 4 +// CK23-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7 +// CK23-NEXT: store ptr null, ptr [[TMP43]], align 4 +// CK23-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8 +// CK23-NEXT: store i64 0, ptr [[TMP44]], align 8 +// CK23-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9 +// CK23-NEXT: store i64 0, ptr [[TMP45]], align 8 +// CK23-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10 +// CK23-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP46]], align 4 +// CK23-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11 +// CK23-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP47]], align 4 +// CK23-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12 +// CK23-NEXT: store i32 0, ptr [[TMP48]], align 4 +// CK23-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118.region_id, ptr [[KERNEL_ARGS5]]) +// CK23-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CK23-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CK23: omp_offload.failed6: +// CK23-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118(ptr [[THIS1]]) #[[ATTR3]] +// CK23-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CK23: omp_offload.cont7: +// CK23-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 +// CK23-NEXT: [[B9:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK23-NEXT: [[TMP51:%.*]] = load ptr, ptr [[B9]], align 4 +// CK23-NEXT: [[TMP52:%.*]] = getelementptr ptr, ptr [[B9]], i32 1 +// CK23-NEXT: [[TMP53:%.*]] = ptrtoint ptr [[TMP52]] to i64 +// CK23-NEXT: [[TMP54:%.*]] = ptrtoint ptr [[A8]] to i64 +// CK23-NEXT: [[TMP55:%.*]] = sub i64 [[TMP53]], [[TMP54]] +// CK23-NEXT: [[TMP56:%.*]] = sdiv exact i64 [[TMP55]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CK23-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES13]], ptr align 4 @.offload_sizes.3, i32 24, i1 false) +// CK23-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CK23-NEXT: store ptr [[THIS1]], ptr [[TMP57]], align 4 +// CK23-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CK23-NEXT: store ptr [[A8]], ptr [[TMP58]], align 4 +// CK23-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 +// CK23-NEXT: store i64 [[TMP56]], ptr [[TMP59]], align 4 +// CK23-NEXT: [[TMP60:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i32 0, i32 0 +// CK23-NEXT: store ptr null, ptr [[TMP60]], align 4 +// CK23-NEXT: [[TMP61:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1 +// CK23-NEXT: store ptr [[THIS1]], ptr [[TMP61]], align 4 +// CK23-NEXT: [[TMP62:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 1 +// CK23-NEXT: store ptr [[A8]], ptr [[TMP62]], align 4 +// CK23-NEXT: [[TMP63:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i32 0, i32 1 +// CK23-NEXT: store ptr null, ptr [[TMP63]], align 4 +// CK23-NEXT: [[TMP64:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 2 +// CK23-NEXT: store ptr [[THIS1]], ptr [[TMP64]], align 4 +// CK23-NEXT: [[TMP65:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 2 +// CK23-NEXT: store ptr [[TMP51]], ptr [[TMP65]], align 4 +// CK23-NEXT: [[TMP66:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i32 0, i32 2 +// CK23-NEXT: store ptr null, ptr [[TMP66]], align 4 +// CK23-NEXT: [[TMP67:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CK23-NEXT: [[TMP68:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CK23-NEXT: [[TMP69:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 +// CK23-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 +// CK23-NEXT: store i32 2, ptr [[TMP70]], align 4 +// CK23-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 +// CK23-NEXT: store i32 3, ptr [[TMP71]], align 4 +// CK23-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 +// CK23-NEXT: store ptr [[TMP67]], ptr [[TMP72]], align 4 +// CK23-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 3 +// CK23-NEXT: store ptr [[TMP68]], ptr [[TMP73]], align 4 +// CK23-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 4 +// CK23-NEXT: store ptr [[TMP69]], ptr [[TMP74]], align 4 +// CK23-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 5 +// CK23-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP75]], align 4 +// CK23-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 6 +// CK23-NEXT: store ptr null, ptr [[TMP76]], align 4 +// CK23-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 7 +// CK23-NEXT: store ptr null, ptr [[TMP77]], align 4 +// CK23-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 8 +// CK23-NEXT: store i64 0, ptr [[TMP78]], align 8 +// CK23-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 9 +// CK23-NEXT: store i64 0, ptr [[TMP79]], align 8 +// CK23-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 10 +// CK23-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP80]], align 4 +// CK23-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 11 +// CK23-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP81]], align 4 +// CK23-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 12 +// CK23-NEXT: store i32 0, ptr [[TMP82]], align 4 +// CK23-NEXT: [[TMP83:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125.region_id, ptr [[KERNEL_ARGS14]]) +// CK23-NEXT: [[TMP84:%.*]] = icmp ne i32 [[TMP83]], 0 +// CK23-NEXT: br i1 [[TMP84]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]] +// CK23: omp_offload.failed15: +// CK23-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125(ptr [[THIS1]]) #[[ATTR3]] +// CK23-NEXT: br label [[OMP_OFFLOAD_CONT16]] +// CK23: omp_offload.cont16: +// CK23-NEXT: ret void +// +// +// CK23-LABEL: define {{[^@]+}}@_ZN2STIdEC2ERPd +// CK23-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CK23-NEXT: entry: +// CK23-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CK23-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CK23-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CK23-NEXT: store ptr null, ptr [[A]], align 4 +// CK23-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CK23-NEXT: store ptr [[TMP0]], ptr [[B2]], align 4 +// CK23-NEXT: ret void +// +// +// CK23-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112 +// CK23-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { +// CK23-NEXT: entry: +// CK23-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK23-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 4 +// CK23-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK23-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 4 +// CK23-NEXT: ret void +// +// +// CK23-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118 +// CK23-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CK23-NEXT: entry: +// CK23-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK23-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK23-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 1 +// CK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 4 +// CK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CK23-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// CK23-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP1]], align 4 +// CK23-NEXT: ret void +// +// +// CK23-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125 +// CK23-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CK23-NEXT: entry: +// CK23-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK23-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 4 +// CK23-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK23-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 4 +// CK23-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP0]], i32 0, i32 1 +// CK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B]], align 4 +// CK23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CK23-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// CK23-NEXT: store ptr [[INCDEC_PTR1]], ptr [[TMP2]], align 4 +// CK23-NEXT: ret void +// +// +// CK23-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK23-SAME: () #[[ATTR5:[0-9]+]] { +// CK23-NEXT: entry: +// CK23-NEXT: call void @__tgt_register_requires(i64 1) +// CK23-NEXT: ret void +// +// +// SIMD-ONLY10-LABEL: define {{[^@]+}}@_Z3barPd +// SIMD-ONLY10-SAME: (ptr noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY10-NEXT: entry: +// SIMD-ONLY10-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY10-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 8 +// SIMD-ONLY10-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 +// SIMD-ONLY10-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) +// SIMD-ONLY10-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) +// SIMD-ONLY10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 +// SIMD-ONLY10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY10-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 8 +// SIMD-ONLY10-NEXT: ret void +// +// +// SIMD-ONLY10-LABEL: define {{[^@]+}}@_ZN2STIdEC1ERPd +// SIMD-ONLY10-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// SIMD-ONLY10-NEXT: entry: +// SIMD-ONLY10-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY10-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY10-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY10-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// SIMD-ONLY10-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// SIMD-ONLY10-NEXT: call void @_ZN2STIdEC2ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]], ptr noundef nonnull align 8 dereferenceable(8) [[TMP0]]) +// SIMD-ONLY10-NEXT: ret void +// +// +// SIMD-ONLY10-LABEL: define {{[^@]+}}@_ZN2STIdE3fooERPd +// SIMD-ONLY10-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG:%.*]]) #[[ATTR0]] comdat align 2 { +// SIMD-ONLY10-NEXT: entry: +// SIMD-ONLY10-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY10-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY10-NEXT: [[LA:%.*]] = alloca ptr, align 8 +// SIMD-ONLY10-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY10-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 +// SIMD-ONLY10-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY10-NEXT: store ptr null, ptr [[LA]], align 8 +// SIMD-ONLY10-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// SIMD-ONLY10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A]], align 8 +// SIMD-ONLY10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY10-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 8 +// SIMD-ONLY10-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// SIMD-ONLY10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 8 +// SIMD-ONLY10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// SIMD-ONLY10-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// SIMD-ONLY10-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP1]], align 8 +// SIMD-ONLY10-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 +// SIMD-ONLY10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A3]], align 8 +// SIMD-ONLY10-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// SIMD-ONLY10-NEXT: store ptr [[INCDEC_PTR4]], ptr [[A3]], align 8 +// SIMD-ONLY10-NEXT: [[B5:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// SIMD-ONLY10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B5]], align 8 +// SIMD-ONLY10-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// SIMD-ONLY10-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i32 1 +// SIMD-ONLY10-NEXT: store ptr [[INCDEC_PTR6]], ptr [[TMP4]], align 8 +// SIMD-ONLY10-NEXT: ret void +// +// +// SIMD-ONLY10-LABEL: define {{[^@]+}}@_ZN2STIdEC2ERPd +// SIMD-ONLY10-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY10-NEXT: entry: +// SIMD-ONLY10-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY10-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY10-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY10-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// SIMD-ONLY10-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY10-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// SIMD-ONLY10-NEXT: store ptr null, ptr [[A]], align 8 +// SIMD-ONLY10-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// SIMD-ONLY10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// SIMD-ONLY10-NEXT: store ptr [[TMP0]], ptr [[B2]], align 8 +// SIMD-ONLY10-NEXT: ret void +// +// +// SIMD-ONLY11-LABEL: define {{[^@]+}}@_Z3barPd +// SIMD-ONLY11-SAME: (ptr noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY11-NEXT: entry: +// SIMD-ONLY11-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY11-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 8 +// SIMD-ONLY11-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 +// SIMD-ONLY11-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) +// SIMD-ONLY11-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) +// SIMD-ONLY11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 +// SIMD-ONLY11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY11-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 8 +// SIMD-ONLY11-NEXT: ret void +// +// +// SIMD-ONLY11-LABEL: define {{[^@]+}}@_ZN2STIdEC1ERPd +// SIMD-ONLY11-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// SIMD-ONLY11-NEXT: entry: +// SIMD-ONLY11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// SIMD-ONLY11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// SIMD-ONLY11-NEXT: call void @_ZN2STIdEC2ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]], ptr noundef nonnull align 8 dereferenceable(8) [[TMP0]]) +// SIMD-ONLY11-NEXT: ret void +// +// +// SIMD-ONLY11-LABEL: define {{[^@]+}}@_ZN2STIdE3fooERPd +// SIMD-ONLY11-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG:%.*]]) #[[ATTR0]] comdat align 2 { +// SIMD-ONLY11-NEXT: entry: +// SIMD-ONLY11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY11-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY11-NEXT: [[LA:%.*]] = alloca ptr, align 8 +// SIMD-ONLY11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY11-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 +// SIMD-ONLY11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY11-NEXT: store ptr null, ptr [[LA]], align 8 +// SIMD-ONLY11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// SIMD-ONLY11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A]], align 8 +// SIMD-ONLY11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY11-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 8 +// SIMD-ONLY11-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// SIMD-ONLY11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 8 +// SIMD-ONLY11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// SIMD-ONLY11-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// SIMD-ONLY11-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP1]], align 8 +// SIMD-ONLY11-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 +// SIMD-ONLY11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A3]], align 8 +// SIMD-ONLY11-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// SIMD-ONLY11-NEXT: store ptr [[INCDEC_PTR4]], ptr [[A3]], align 8 +// SIMD-ONLY11-NEXT: [[B5:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// SIMD-ONLY11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B5]], align 8 +// SIMD-ONLY11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// SIMD-ONLY11-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i32 1 +// SIMD-ONLY11-NEXT: store ptr [[INCDEC_PTR6]], ptr [[TMP4]], align 8 +// SIMD-ONLY11-NEXT: ret void +// +// +// SIMD-ONLY11-LABEL: define {{[^@]+}}@_ZN2STIdEC2ERPd +// SIMD-ONLY11-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY11-NEXT: entry: +// SIMD-ONLY11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// SIMD-ONLY11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// SIMD-ONLY11-NEXT: store ptr null, ptr [[A]], align 8 +// SIMD-ONLY11-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// SIMD-ONLY11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// SIMD-ONLY11-NEXT: store ptr [[TMP0]], ptr [[B2]], align 8 +// SIMD-ONLY11-NEXT: ret void +// +// +// SIMD-ONLY12-LABEL: define {{[^@]+}}@_Z3barPd +// SIMD-ONLY12-SAME: (ptr noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY12-NEXT: entry: +// SIMD-ONLY12-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY12-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 +// SIMD-ONLY12-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 4 +// SIMD-ONLY12-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) +// SIMD-ONLY12-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) +// SIMD-ONLY12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY12-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 4 +// SIMD-ONLY12-NEXT: ret void +// +// +// SIMD-ONLY12-LABEL: define {{[^@]+}}@_ZN2STIdEC1ERPd +// SIMD-ONLY12-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// SIMD-ONLY12-NEXT: entry: +// SIMD-ONLY12-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY12-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY12-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// SIMD-ONLY12-NEXT: call void @_ZN2STIdEC2ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0]]) +// SIMD-ONLY12-NEXT: ret void +// +// +// SIMD-ONLY12-LABEL: define {{[^@]+}}@_ZN2STIdE3fooERPd +// SIMD-ONLY12-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR0]] comdat align 2 { +// SIMD-ONLY12-NEXT: entry: +// SIMD-ONLY12-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY12-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY12-NEXT: [[LA:%.*]] = alloca ptr, align 4 +// SIMD-ONLY12-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store ptr null, ptr [[LA]], align 4 +// SIMD-ONLY12-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// SIMD-ONLY12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A]], align 4 +// SIMD-ONLY12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY12-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 4 +// SIMD-ONLY12-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// SIMD-ONLY12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 4 +// SIMD-ONLY12-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// SIMD-ONLY12-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// SIMD-ONLY12-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP1]], align 4 +// SIMD-ONLY12-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 +// SIMD-ONLY12-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A3]], align 4 +// SIMD-ONLY12-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// SIMD-ONLY12-NEXT: store ptr [[INCDEC_PTR4]], ptr [[A3]], align 4 +// SIMD-ONLY12-NEXT: [[B5:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// SIMD-ONLY12-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B5]], align 4 +// SIMD-ONLY12-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// SIMD-ONLY12-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i32 1 +// SIMD-ONLY12-NEXT: store ptr [[INCDEC_PTR6]], ptr [[TMP4]], align 4 +// SIMD-ONLY12-NEXT: ret void +// +// +// SIMD-ONLY12-LABEL: define {{[^@]+}}@_ZN2STIdEC2ERPd +// SIMD-ONLY12-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY12-NEXT: entry: +// SIMD-ONLY12-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY12-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY12-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// SIMD-ONLY12-NEXT: store ptr null, ptr [[A]], align 4 +// SIMD-ONLY12-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// SIMD-ONLY12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store ptr [[TMP0]], ptr [[B2]], align 4 +// SIMD-ONLY12-NEXT: ret void +// +// +// SIMD-ONLY13-LABEL: define {{[^@]+}}@_Z3barPd +// SIMD-ONLY13-SAME: (ptr noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY13-NEXT: entry: +// SIMD-ONLY13-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY13-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 +// SIMD-ONLY13-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 4 +// SIMD-ONLY13-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) +// SIMD-ONLY13-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) +// SIMD-ONLY13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY13-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 4 +// SIMD-ONLY13-NEXT: ret void +// +// +// SIMD-ONLY13-LABEL: define {{[^@]+}}@_ZN2STIdEC1ERPd +// SIMD-ONLY13-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// SIMD-ONLY13-NEXT: entry: +// SIMD-ONLY13-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY13-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY13-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// SIMD-ONLY13-NEXT: call void @_ZN2STIdEC2ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0]]) +// SIMD-ONLY13-NEXT: ret void +// +// +// SIMD-ONLY13-LABEL: define {{[^@]+}}@_ZN2STIdE3fooERPd +// SIMD-ONLY13-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR0]] comdat align 2 { +// SIMD-ONLY13-NEXT: entry: +// SIMD-ONLY13-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY13-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY13-NEXT: [[LA:%.*]] = alloca ptr, align 4 +// SIMD-ONLY13-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store ptr null, ptr [[LA]], align 4 +// SIMD-ONLY13-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// SIMD-ONLY13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A]], align 4 +// SIMD-ONLY13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY13-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 4 +// SIMD-ONLY13-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// SIMD-ONLY13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 4 +// SIMD-ONLY13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// SIMD-ONLY13-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// SIMD-ONLY13-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP1]], align 4 +// SIMD-ONLY13-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 +// SIMD-ONLY13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A3]], align 4 +// SIMD-ONLY13-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// SIMD-ONLY13-NEXT: store ptr [[INCDEC_PTR4]], ptr [[A3]], align 4 +// SIMD-ONLY13-NEXT: [[B5:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// SIMD-ONLY13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B5]], align 4 +// SIMD-ONLY13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// SIMD-ONLY13-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i32 1 +// SIMD-ONLY13-NEXT: store ptr [[INCDEC_PTR6]], ptr [[TMP4]], align 4 +// SIMD-ONLY13-NEXT: ret void +// +// +// SIMD-ONLY13-LABEL: define {{[^@]+}}@_ZN2STIdEC2ERPd +// SIMD-ONLY13-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY13-NEXT: entry: +// SIMD-ONLY13-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY13-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY13-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// SIMD-ONLY13-NEXT: store ptr null, ptr [[A]], align 4 +// SIMD-ONLY13-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// SIMD-ONLY13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store ptr [[TMP0]], ptr [[B2]], align 4 +// SIMD-ONLY13-NEXT: ret void +// +// +// CK30-LABEL: define {{[^@]+}}@_Z3barv +// CK30-SAME: () #[[ATTR0:[0-9]+]] { +// CK30-NEXT: entry: +// CK30-NEXT: [[PTR:%.*]] = alloca ptr, align 64 +// CK30-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CK30-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CK30-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CK30-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK30-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 64 +// CK30-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK30-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CK30-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK30-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CK30-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CK30-NEXT: store ptr null, ptr [[TMP3]], align 8 +// CK30-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK30-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK30-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK30-NEXT: store i32 2, ptr [[TMP6]], align 4 +// CK30-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK30-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CK30-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK30-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CK30-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK30-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CK30-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK30-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 8 +// CK30-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK30-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 8 +// CK30-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK30-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CK30-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK30-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CK30-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK30-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK30-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK30-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CK30-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK30-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP16]], align 4 +// CK30-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK30-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CK30-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK30-NEXT: store i32 0, ptr [[TMP18]], align 4 +// CK30-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159.region_id, ptr [[KERNEL_ARGS]]) +// CK30-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CK30-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK30: omp_offload.failed: +// CK30-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159(ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CK30-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK30: omp_offload.cont: +// CK30-NEXT: ret void +// +// +// CK30-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159 +// CK30-SAME: (ptr noundef [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { +// CK30-NEXT: entry: +// CK30-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8 +// CK30-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8 +// CK30-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +// CK30-NEXT: store double 0.000000e+00, ptr [[TMP0]], align 8 +// CK30-NEXT: ret void +// +// +// CK30-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK30-SAME: () #[[ATTR3:[0-9]+]] { +// CK30-NEXT: entry: +// CK30-NEXT: call void @__tgt_register_requires(i64 1) +// CK30-NEXT: ret void +// +// +// CK31-LABEL: define {{[^@]+}}@_Z3barv +// CK31-SAME: () #[[ATTR0:[0-9]+]] { +// CK31-NEXT: entry: +// CK31-NEXT: [[PTR:%.*]] = alloca ptr, align 64 +// CK31-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CK31-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CK31-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CK31-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK31-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 64 +// CK31-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK31-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CK31-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK31-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CK31-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CK31-NEXT: store ptr null, ptr [[TMP3]], align 8 +// CK31-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK31-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK31-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK31-NEXT: store i32 2, ptr [[TMP6]], align 4 +// CK31-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK31-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CK31-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK31-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CK31-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK31-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CK31-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK31-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 8 +// CK31-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK31-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 8 +// CK31-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK31-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CK31-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK31-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CK31-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK31-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK31-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK31-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CK31-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK31-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP16]], align 4 +// CK31-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK31-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CK31-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK31-NEXT: store i32 0, ptr [[TMP18]], align 4 +// CK31-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159.region_id, ptr [[KERNEL_ARGS]]) +// CK31-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CK31-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK31: omp_offload.failed: +// CK31-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159(ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CK31-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK31: omp_offload.cont: +// CK31-NEXT: ret void +// +// +// CK31-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159 +// CK31-SAME: (ptr noundef [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { +// CK31-NEXT: entry: +// CK31-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8 +// CK31-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8 +// CK31-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +// CK31-NEXT: store double 0.000000e+00, ptr [[TMP0]], align 8 +// CK31-NEXT: ret void +// +// +// CK31-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK31-SAME: () #[[ATTR3:[0-9]+]] { +// CK31-NEXT: entry: +// CK31-NEXT: call void @__tgt_register_requires(i64 1) +// CK31-NEXT: ret void +// +// +// CK32-LABEL: define {{[^@]+}}@_Z3barv +// CK32-SAME: () #[[ATTR0:[0-9]+]] { +// CK32-NEXT: entry: +// CK32-NEXT: [[PTR:%.*]] = alloca ptr, align 64 +// CK32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CK32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CK32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CK32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 64 +// CK32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK32-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CK32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK32-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CK32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CK32-NEXT: store ptr null, ptr [[TMP3]], align 4 +// CK32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK32-NEXT: store i32 2, ptr [[TMP6]], align 4 +// CK32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK32-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CK32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK32-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CK32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK32-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CK32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK32-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 4 +// CK32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK32-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 4 +// CK32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK32-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CK32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK32-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CK32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK32-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK32-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CK32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP16]], align 4 +// CK32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CK32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK32-NEXT: store i32 0, ptr [[TMP18]], align 4 +// CK32-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159.region_id, ptr [[KERNEL_ARGS]]) +// CK32-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CK32-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK32: omp_offload.failed: +// CK32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159(ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CK32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK32: omp_offload.cont: +// CK32-NEXT: ret void +// +// +// CK32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159 +// CK32-SAME: (ptr noundef [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { +// CK32-NEXT: entry: +// CK32-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 4 +// CK32-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 4 +// CK32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4 +// CK32-NEXT: store double 0.000000e+00, ptr [[TMP0]], align 4 +// CK32-NEXT: ret void +// +// +// CK32-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK32-SAME: () #[[ATTR3:[0-9]+]] { +// CK32-NEXT: entry: +// CK32-NEXT: call void @__tgt_register_requires(i64 1) +// CK32-NEXT: ret void +// +// +// CK33-LABEL: define {{[^@]+}}@_Z3barv +// CK33-SAME: () #[[ATTR0:[0-9]+]] { +// CK33-NEXT: entry: +// CK33-NEXT: [[PTR:%.*]] = alloca ptr, align 64 +// CK33-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CK33-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CK33-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CK33-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK33-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 64 +// CK33-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK33-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CK33-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK33-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CK33-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CK33-NEXT: store ptr null, ptr [[TMP3]], align 4 +// CK33-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK33-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK33-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK33-NEXT: store i32 2, ptr [[TMP6]], align 4 +// CK33-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK33-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CK33-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK33-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CK33-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK33-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CK33-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK33-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 4 +// CK33-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK33-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 4 +// CK33-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK33-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CK33-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK33-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CK33-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK33-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK33-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK33-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CK33-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK33-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP16]], align 4 +// CK33-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK33-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CK33-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK33-NEXT: store i32 0, ptr [[TMP18]], align 4 +// CK33-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159.region_id, ptr [[KERNEL_ARGS]]) +// CK33-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CK33-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK33: omp_offload.failed: +// CK33-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159(ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CK33-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK33: omp_offload.cont: +// CK33-NEXT: ret void +// +// +// CK33-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159 +// CK33-SAME: (ptr noundef [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { +// CK33-NEXT: entry: +// CK33-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 4 +// CK33-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 4 +// CK33-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4 +// CK33-NEXT: store double 0.000000e+00, ptr [[TMP0]], align 4 +// CK33-NEXT: ret void +// +// +// CK33-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK33-SAME: () #[[ATTR3:[0-9]+]] { +// CK33-NEXT: entry: +// CK33-NEXT: call void @__tgt_register_requires(i64 1) +// CK33-NEXT: ret void +// +// +// SIMD-ONLY20-LABEL: define {{[^@]+}}@_Z3barv +// SIMD-ONLY20-SAME: () #[[ATTR0:[0-9]+]] { +// SIMD-ONLY20-NEXT: entry: +// SIMD-ONLY20-NEXT: [[PTR:%.*]] = alloca ptr, align 64 +// SIMD-ONLY20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 64 +// SIMD-ONLY20-NEXT: store double 0.000000e+00, ptr [[TMP0]], align 8 +// SIMD-ONLY20-NEXT: ret void +// +// +// SIMD-ONLY21-LABEL: define {{[^@]+}}@_Z3barv +// SIMD-ONLY21-SAME: () #[[ATTR0:[0-9]+]] { +// SIMD-ONLY21-NEXT: entry: +// SIMD-ONLY21-NEXT: [[PTR:%.*]] = alloca ptr, align 64 +// SIMD-ONLY21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 64 +// SIMD-ONLY21-NEXT: store double 0.000000e+00, ptr [[TMP0]], align 8 +// SIMD-ONLY21-NEXT: ret void +// +// +// SIMD-ONLY22-LABEL: define {{[^@]+}}@_Z3barv +// SIMD-ONLY22-SAME: () #[[ATTR0:[0-9]+]] { +// SIMD-ONLY22-NEXT: entry: +// SIMD-ONLY22-NEXT: [[PTR:%.*]] = alloca ptr, align 64 +// SIMD-ONLY22-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 64 +// SIMD-ONLY22-NEXT: store double 0.000000e+00, ptr [[TMP0]], align 4 +// SIMD-ONLY22-NEXT: ret void +// +// +// SIMD-ONLY23-LABEL: define {{[^@]+}}@_Z3barv +// SIMD-ONLY23-SAME: () #[[ATTR0:[0-9]+]] { +// SIMD-ONLY23-NEXT: entry: +// SIMD-ONLY23-NEXT: [[PTR:%.*]] = alloca ptr, align 64 +// SIMD-ONLY23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 64 +// SIMD-ONLY23-NEXT: store double 0.000000e+00, ptr [[TMP0]], align 4 +// SIMD-ONLY23-NEXT: ret void +// diff --git a/clang/test/OpenMP/teams_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_firstprivate_codegen.cpp index c1617e16b402c..e8f9512114f7f 100644 --- a/clang/test/OpenMP/teams_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_firstprivate_codegen.cpp @@ -168,46 +168,42 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l91 -// CHECK1-SAME: (ptr nonnull align 4 dereferenceable(4) [[G:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (i64 [[G:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1:%.*]] = alloca i32, align 128 +// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[G1]], align 128 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[G_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[SIVAR_CASTED]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[G1]], i64 [[TMP3]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP3]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[G:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[G:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[G1]], align 128 -// CHECK1-NEXT: store i32 1, ptr [[G1]], align 128 +// CHECK1-NEXT: store i32 1, ptr [[G_ADDR]], align 4 // CHECK1-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G1]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP1]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK1-NEXT: ret void // @@ -230,46 +226,42 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l91 -// CHECK3-SAME: (ptr nonnull align 4 dereferenceable(4) [[G:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (i32 [[G:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 128 +// CHECK3-NEXT: [[G_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[G1]], align 128 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[G_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[SIVAR_CASTED]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[G1]], i32 [[TMP3]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., i32 [[TMP1]], i32 [[TMP3]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[G:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[G:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 128 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[G1]], align 128 -// CHECK3-NEXT: store i32 1, ptr [[G1]], align 128 +// CHECK3-NEXT: store i32 1, ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G1]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP1]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) // CHECK3-NEXT: ret void // @@ -637,14 +629,16 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 // CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 // CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 +// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8 -// CHECK9-NEXT: [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8 -// CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8 -// CHECK9-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK9-NEXT: [[T_VAR_CASTED1:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [1 x ptr], align 8 +// CHECK9-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [1 x ptr], align 8 +// CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [1 x ptr], align 8 +// CHECK9-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC]], ptr align 128 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) @@ -653,121 +647,127 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 signext 2) // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[VAR]], i32 signext 3) -// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK9-NEXT: store ptr null, ptr [[TMP5]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 -// CHECK9-NEXT: store ptr null, ptr [[TMP8]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK9-NEXT: store ptr null, ptr [[TMP11]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 2, ptr [[TMP14]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 4, ptr [[TMP15]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP16]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP13]], ptr [[TMP17]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr @.offload_sizes.5, ptr [[TMP18]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP19]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP20]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP21]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 0, ptr [[TMP22]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 -// CHECK9-NEXT: store i64 0, ptr [[TMP23]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 -// CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP24]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 -// CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP25]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 -// CHECK9-NEXT: store i32 0, ptr [[TMP26]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK9-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK9-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK9-NEXT: store ptr null, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[VAR]], ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[VAR]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK9-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 2, ptr [[TMP16]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 4, ptr [[TMP17]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP14]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP15]], ptr [[TMP19]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr @.offload_sizes.5, ptr [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 0, ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK9-NEXT: store i64 0, ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP26]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP27]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK9-NEXT: store i32 0, ptr [[TMP28]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK9-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: -// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75(ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75(i64 [[TMP1]], ptr [[VEC]], ptr [[S_ARR]], ptr [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: -// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[TMP29]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[TMP30]], align 8 -// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 -// CHECK9-NEXT: store ptr null, ptr [[TMP31]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK9-NEXT: store i32 2, ptr [[TMP34]], align 4 -// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 -// CHECK9-NEXT: store i32 1, ptr [[TMP35]], align 4 -// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP32]], ptr [[TMP36]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP33]], ptr [[TMP37]], align 8 -// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 -// CHECK9-NEXT: store ptr @.offload_sizes.8, ptr [[TMP38]], align 8 -// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP39]], align 8 -// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP40]], align 8 -// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP41]], align 8 -// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 -// CHECK9-NEXT: store i64 0, ptr [[TMP42]], align 8 -// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 9 -// CHECK9-NEXT: store i64 0, ptr [[TMP43]], align 8 -// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 10 -// CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP44]], align 4 -// CHECK9-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 11 -// CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP45]], align 4 -// CHECK9-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 12 -// CHECK9-NEXT: store i32 0, ptr [[TMP46]], align 4 -// CHECK9-NEXT: [[TMP47:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81.region_id, ptr [[KERNEL_ARGS4]]) -// CHECK9-NEXT: [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0 -// CHECK9-NEXT: br i1 [[TMP48]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] -// CHECK9: omp_offload.failed5: -// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81(ptr [[T_VAR]]) #[[ATTR4]] -// CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT6]] -// CHECK9: omp_offload.cont6: +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK9-NEXT: store i32 [[TMP31]], ptr [[T_VAR_CASTED1]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i64, ptr [[T_VAR_CASTED1]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP32]], ptr [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP32]], ptr [[TMP34]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP35]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 +// CHECK9-NEXT: store i32 2, ptr [[TMP38]], align 4 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 +// CHECK9-NEXT: store i32 1, ptr [[TMP39]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP36]], ptr [[TMP40]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP37]], ptr [[TMP41]], align 8 +// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4 +// CHECK9-NEXT: store ptr @.offload_sizes.8, ptr [[TMP42]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP43]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP44]], align 8 +// CHECK9-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP45]], align 8 +// CHECK9-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8 +// CHECK9-NEXT: store i64 0, ptr [[TMP46]], align 8 +// CHECK9-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9 +// CHECK9-NEXT: store i64 0, ptr [[TMP47]], align 8 +// CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10 +// CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP48]], align 4 +// CHECK9-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11 +// CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP49]], align 4 +// CHECK9-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12 +// CHECK9-NEXT: store i32 0, ptr [[TMP50]], align 4 +// CHECK9-NEXT: [[TMP51:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81.region_id, ptr [[KERNEL_ARGS5]]) +// CHECK9-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0 +// CHECK9-NEXT: br i1 [[TMP52]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CHECK9: omp_offload.failed6: +// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81(i64 [[TMP32]]) #[[ATTR4]] +// CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CHECK9: omp_offload.cont7: // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP49]], [[OMP_OFFLOAD_CONT6]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP53]], [[OMP_OFFLOAD_CONT7]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done7: +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done8: // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP50:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP50]] +// CHECK9-NEXT: [[TMP54:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP54]] // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -877,89 +877,85 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75 -// CHECK9-SAME: (ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i64 [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 +// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[T_VAR1]], align 128 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..4, ptr [[TMP1]], ptr [[T_VAR1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..4, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 -// CHECK9-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 128 -// CHECK9-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 128 +// CHECK9-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 128 +// CHECK9-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S.0], align 128 // CHECK9-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK9-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK9-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 +// CHECK9-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 128 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[T_VAR1]], align 128 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC2]], ptr align 128 [[TMP0]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC1]], ptr align 128 [[TMP0]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done4: -// CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR5]], ptr nonnull align 4 dereferenceable(4) [[TMP3]], ptr [[AGG_TMP6]]) -// CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 128 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 0 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX]], align 128 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i64 0, i64 0 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[ARRAYIDX7]], ptr align 128 [[VAR5]], i64 4, i1 false) -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done3: +// CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) +// CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR4]], ptr nonnull align 4 dereferenceable(4) [[TMP2]], ptr [[AGG_TMP5]]) +// CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i64 0, i64 0 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 128 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i64 0, i64 0 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[ARRAYIDX6]], ptr align 128 [[VAR4]], i64 4, i1 false) +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[OMP_ARRAYCPY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[OMP_ARRAYCPY_DONE3]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done9: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done8: // CHECK9-NEXT: ret void // // @@ -989,31 +985,27 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81 -// CHECK9-SAME: (ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i64 [[T_VAR:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[T_VAR1]], align 128 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[T_VAR1]]) +// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, i64 [[TMP1]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[T_VAR:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 +// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[T_VAR1]], align 128 +// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1438,14 +1430,16 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 // CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 // CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 +// CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 4 // CHECK11-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4 // CHECK11-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 4 -// CHECK11-NEXT: [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 4 -// CHECK11-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 4 -// CHECK11-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK11-NEXT: [[T_VAR_CASTED1:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [1 x ptr], align 4 +// CHECK11-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [1 x ptr], align 4 +// CHECK11-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [1 x ptr], align 4 +// CHECK11-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VEC]], ptr align 128 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) @@ -1454,121 +1448,127 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 2) // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[VAR]], i32 3) -// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK11-NEXT: store ptr null, ptr [[TMP5]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr null, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[TMP9]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[TMP10]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr null, ptr [[TMP11]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 2, ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 4, ptr [[TMP15]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP12]], ptr [[TMP16]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP13]], ptr [[TMP17]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr @.offload_sizes.5, ptr [[TMP18]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP19]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP20]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP21]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 0, ptr [[TMP22]], align 8 -// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 -// CHECK11-NEXT: store i64 0, ptr [[TMP23]], align 8 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 -// CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP24]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 -// CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP25]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 -// CHECK11-NEXT: store i32 0, ptr [[TMP26]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK11-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[VEC]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[VEC]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK11-NEXT: store ptr null, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[VAR]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[VAR]], ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 2, ptr [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 4, ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP14]], ptr [[TMP18]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP15]], ptr [[TMP19]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr @.offload_sizes.5, ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 0, ptr [[TMP24]], align 8 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK11-NEXT: store i64 0, ptr [[TMP25]], align 8 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK11-NEXT: store i32 0, ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK11-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: -// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75(ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75(i32 [[TMP1]], ptr [[VEC]], ptr [[S_ARR]], ptr [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[TMP29]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[TMP30]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 0 -// CHECK11-NEXT: store ptr null, ptr [[TMP31]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK11-NEXT: store i32 2, ptr [[TMP34]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 -// CHECK11-NEXT: store i32 1, ptr [[TMP35]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP32]], ptr [[TMP36]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP33]], ptr [[TMP37]], align 4 -// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 -// CHECK11-NEXT: store ptr @.offload_sizes.8, ptr [[TMP38]], align 4 -// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP39]], align 4 -// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP40]], align 4 -// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP41]], align 4 -// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 -// CHECK11-NEXT: store i64 0, ptr [[TMP42]], align 8 -// CHECK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 9 -// CHECK11-NEXT: store i64 0, ptr [[TMP43]], align 8 -// CHECK11-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 10 -// CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP44]], align 4 -// CHECK11-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 11 -// CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP45]], align 4 -// CHECK11-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 12 -// CHECK11-NEXT: store i32 0, ptr [[TMP46]], align 4 -// CHECK11-NEXT: [[TMP47:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81.region_id, ptr [[KERNEL_ARGS4]]) -// CHECK11-NEXT: [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0 -// CHECK11-NEXT: br i1 [[TMP48]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] -// CHECK11: omp_offload.failed5: -// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81(ptr [[T_VAR]]) #[[ATTR4]] -// CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT6]] -// CHECK11: omp_offload.cont6: +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK11-NEXT: store i32 [[TMP31]], ptr [[T_VAR_CASTED1]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[T_VAR_CASTED1]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP32]], ptr [[TMP33]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP32]], ptr [[TMP34]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP35]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 +// CHECK11-NEXT: store i32 2, ptr [[TMP38]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 +// CHECK11-NEXT: store i32 1, ptr [[TMP39]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP36]], ptr [[TMP40]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP37]], ptr [[TMP41]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4 +// CHECK11-NEXT: store ptr @.offload_sizes.8, ptr [[TMP42]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP43]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP44]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP45]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8 +// CHECK11-NEXT: store i64 0, ptr [[TMP46]], align 8 +// CHECK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9 +// CHECK11-NEXT: store i64 0, ptr [[TMP47]], align 8 +// CHECK11-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10 +// CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP48]], align 4 +// CHECK11-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11 +// CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP49]], align 4 +// CHECK11-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12 +// CHECK11-NEXT: store i32 0, ptr [[TMP50]], align 4 +// CHECK11-NEXT: [[TMP51:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81.region_id, ptr [[KERNEL_ARGS5]]) +// CHECK11-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0 +// CHECK11-NEXT: br i1 [[TMP52]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CHECK11: omp_offload.failed6: +// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81(i32 [[TMP32]]) #[[ATTR4]] +// CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CHECK11: omp_offload.cont7: // CHECK11-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP49]], [[OMP_OFFLOAD_CONT6]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP53]], [[OMP_OFFLOAD_CONT7]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done7: +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done8: // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK11-NEXT: [[TMP50:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP50]] +// CHECK11-NEXT: [[TMP54:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP54]] // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -1678,89 +1678,85 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75 -// CHECK11-SAME: (ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32 [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[T_VAR1]], align 128 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..4, ptr [[TMP1]], ptr [[T_VAR1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..4, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 -// CHECK11-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 128 -// CHECK11-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 128 +// CHECK11-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 128 +// CHECK11-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S.0], align 128 // CHECK11-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK11-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK11-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 +// CHECK11-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 128 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[T_VAR1]], align 128 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VEC2]], ptr align 128 [[TMP0]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VEC1]], ptr align 128 [[TMP0]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK11-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr [[AGG_TMP]]) // CHECK11-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done4: -// CHECK11-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK11-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR5]], ptr nonnull align 4 dereferenceable(4) [[TMP3]], ptr [[AGG_TMP6]]) -// CHECK11-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 128 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX]], align 128 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[ARRAYIDX7]], ptr align 128 [[VAR5]], i32 4, i1 false) -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done3: +// CHECK11-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) +// CHECK11-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR4]], ptr nonnull align 4 dereferenceable(4) [[TMP2]], ptr [[AGG_TMP5]]) +// CHECK11-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 128 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[ARRAYIDX6]], ptr align 128 [[VAR4]], i32 4, i1 false) +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[OMP_ARRAYCPY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[OMP_ARRAYCPY_DONE3]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done9: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done8: // CHECK11-NEXT: ret void // // @@ -1790,31 +1786,27 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81 -// CHECK11-SAME: (ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32 [[T_VAR:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[T_VAR1]], align 128 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[T_VAR1]]) +// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, i32 [[TMP1]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[T_VAR:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 +// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[T_VAR1]], align 128 +// CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: ret void // // diff --git a/clang/test/Sema/builtin-assume-separate-storage.c b/clang/test/Sema/builtin-assume-separate-storage.c new file mode 100644 index 0000000000000..f27d0b821d5ab --- /dev/null +++ b/clang/test/Sema/builtin-assume-separate-storage.c @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify %s + +void *nonconst(void); + +void test1(int *a, int *b) { + __builtin_assume_separate_storage(a, b); + // Separate storage assumptions evaluate their arguments unconditionally, like + // assume_aligned but *unlike* assume. Check that we don't warn on it. + __builtin_assume_separate_storage(a, nonconst()); + __builtin_assume_separate_storage(nonconst(), a); + __builtin_assume_separate_storage(a, 3); // expected-error {{incompatible integer to pointer conversion}} + __builtin_assume_separate_storage(3, a); // expected-error {{incompatible integer to pointer conversion}} +} diff --git a/clang/test/Sema/caret-diags-complex-init.cpp b/clang/test/Sema/caret-diags-complex-init.cpp new file mode 100644 index 0000000000000..d8a1b7837a640 --- /dev/null +++ b/clang/test/Sema/caret-diags-complex-init.cpp @@ -0,0 +1,39 @@ +// RUN: not %clang_cc1 -std=c++11 -fsyntax-only -fcaret-diagnostics-max-lines 5 %s 2>&1 | FileCheck %s -strict-whitespace + + +//CHECK: {{.*}}: error: excess elements in scalar initializer +//CHECK-NEXT: {{^}}_Complex double gz1 = {1, 2, 3}; +//CHECK-NEXT: {{^}} ^{{$}} +_Complex double gz1 = {1, 2, 3}; + +//CHECK: {{.*}}: error: excess elements in scalar initializer +//CHECK-NEXT: {{^}}_Complex double dd = {1.0, 2.0, 3.0}; +//CHECK-NEXT: {{^}} ^~~{{$}} +_Complex double dd = {1.0, 2.0, 3.0}; + +//CHECK: {{.*}}: error: excess elements in scalar initializer +//CHECK-NEXT: {{^}}_Complex float fd = {1.0, 2.0, 3.0, 4.0, 5.0}; +//CHECK-NEXT: {{^}} ^~~{{$}} +_Complex float fd = {1.0, 2.0, 3.0, 4.0, 5.0}; + +//CHECK: {{.*}}: error: no viable conversion from 'foo' to 'double' +//CHECK-NEXT: {{^}}_Complex double ds = {f, 1.0, b}; +//CHECK-NEXT: {{^}} ^{{$}} +struct foo{}; +struct bar{}; + +foo f; +bar b; +_Complex double ds = {f, 1.0, b}; + +//CHECK: {{.*}}: error: no viable conversion from 'foo' to 'double' +//CHECK-NEXT: {{^}}_Complex double fg = {1.0, f}; +//CHECK-NEXT: {{^}} ^{{$}} +_Complex double fg = {1.0, f}; + + +//CHECK: {{.*}}: error: excess elements in scalar initializer +//CHECK-NEXT: {{^}}_Complex double gg = {1.0, 2.0, f}; +//CHECK-NEXT: {{^}} ^{{$}} +//CHECK-NEXT: {{^}}6 errors generated. +_Complex double gg = {1.0, 2.0, f}; diff --git a/clang/test/Sema/complex-init-list.c b/clang/test/Sema/complex-init-list.c index bfc6899ac235d..b8f87f57f0793 100644 --- a/clang/test/Sema/complex-init-list.c +++ b/clang/test/Sema/complex-init-list.c @@ -25,17 +25,21 @@ struct teststruct { _Complex float x; }; // Random other valid stuff -_Complex int valid2 = { 1, 2 }; // expected-warning {{complex integer}} expected-warning {{specifying real and imaginary components is an extension}} +_Complex int valid2 = { 1, 2 }; // expected-warning {{complex integer}} \ + // expected-warning {{specifying real and imaginary components is an extension}} struct teststruct valid3 = { { 1.0f, 2.0f} }; // expected-warning {{specifying real and imaginary components is an extension}} _Complex float valid4[2] = { {1.0f, 1.0f}, {1.0f, 1.0f} }; // expected-warning 2 {{specifying real and imaginary components is an extension}} // FIXME: We need some sort of warning for valid5 -_Complex float valid5 = {1.0f, 1.0fi}; // expected-warning {{imaginary constants}} expected-warning {{specifying real and imaginary components is an extension}} +_Complex float valid5 = {1.0f, 1.0fi}; // expected-warning {{imaginary constants}} \ + // expected-warning {{specifying real and imaginary components is an extension}} // Random invalid stuff struct teststruct invalid1 = { 1, 2 }; // expected-warning {{excess elements}} -_Complex float invalid2 = { 1, 2, 3 }; // expected-warning {{excess elements}} -_Complex float invalid3 = {}; // expected-error {{scalar initializer cannot be empty}} expected-warning {{GNU empty initializer}} +_Complex float invalid2 = { 1, 2, 3 }; // expected-warning {{specifying real and imaginary components is an extension}} \ + // expected-warning {{excess elements in scalar initializer}} +_Complex float invalid3 = {}; // expected-error {{scalar initializer cannot be empty}} \ + // expected-warning {{GNU empty initializer}} // Check incomplete array sizing @@ -46,3 +50,9 @@ _Complex float sizecheck2[(sizeof(sizetest2) == sizeof(*sizetest2)*3) ? 1 : -1]; // Constant-folding with init list. _Complex float x = 2 + (_Complex float) { 1, 2 }; // expected-warning {{specifying real and imaginary components is an extension}} + +// initialization list +_Complex double cd = {1.0, 2.0, 3.0}; // expected-warning {{specifying real and imaginary components is an extension}} \ + // expected-warning {{excess elements in scalar initializer}} +_Complex float cf = {1.1f, 2.2f, 3.3f, 4.4f}; // expected-warning {{specifying real and imaginary components is an extension}} \ + // expected-warning {{excess elements in scalar initializer}} diff --git a/clang/test/SemaCXX/lambda-expressions.cpp b/clang/test/SemaCXX/lambda-expressions.cpp index 84d224fdc835e..67853c991ce53 100644 --- a/clang/test/SemaCXX/lambda-expressions.cpp +++ b/clang/test/SemaCXX/lambda-expressions.cpp @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -std=c++14 -Wno-unused-value -fsyntax-only -verify -verify=expected-cxx14 -fblocks %s -// RUN: %clang_cc1 -std=c++17 -Wno-unused-value -fsyntax-only -verify -fblocks %s +// RUN: %clang_cc1 -std=c++17 -Wno-unused-value -verify -ast-dump -fblocks %s | FileCheck %s namespace std { class type_info; }; @@ -704,3 +704,13 @@ static_assert([]() constexpr { }()); } // namespace GH60936 #endif + +// Call operator attributes refering to a variable should +// be properly handled after D124351 +constexpr int i = 2; +void foo() { + (void)[=][[gnu::aligned(i)]] () {}; // expected-warning{{C++2b extension}} + // CHECK: AlignedAttr + // CHECK-NEXT: ConstantExpr + // CHECK-NEXT: value: Int 2 +} diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-deref.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-deref.cpp new file mode 100644 index 0000000000000..4a02bbdf71182 --- /dev/null +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-deref.cpp @@ -0,0 +1,55 @@ +// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s + +void basic_dereference() { + int tmp; + auto p = new int[10]; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:11}:"std::span p" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:12-[[@LINE-2]]:12}:"{" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:23-[[@LINE-3]]:23}:", 10}" + tmp = p[5]; + int val = *p; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:13-[[@LINE-1]]:14}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:15-[[@LINE-2]]:15}:"[0]" +} + +int return_method() { + auto p = new int[10]; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:11}:"std::span p" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:12-[[@LINE-2]]:12}:"{" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:23-[[@LINE-3]]:23}:", 10}" + int tmp = p[5]; + return *p; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:10-[[@LINE-1]]:11}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:12-[[@LINE-2]]:12}:"[0]" +} + +void foo(int v) { +} + +void method_invocation() { + auto p = new int[10]; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:11}:"std::span p" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:12-[[@LINE-2]]:12}:"{" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:23-[[@LINE-3]]:23}:", 10}" + + int tmp = p[5]; + + foo(*p); + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:7-[[@LINE-1]]:8}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:9-[[@LINE-2]]:9}:"[0]" +} + +void binary_operation() { + auto p = new int[10]; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:11}:"std::span p" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:12-[[@LINE-2]]:12}:"{" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:23-[[@LINE-3]]:23}:", 10}" + + int tmp = p[5]; + + int k = *p + 20; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:12}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:13-[[@LINE-2]]:13}:"[0]" + +} + diff --git a/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt b/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt index ed38a515be270..c77aeaca90959 100644 --- a/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt +++ b/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt @@ -9,6 +9,7 @@ add_clang_unittest(ClangAnalysisFlowSensitiveTests DataflowAnalysisContextTest.cpp DataflowEnvironmentTest.cpp DebugSupportTest.cpp + LoggerTest.cpp MapLatticeTest.cpp MatchSwitchTest.cpp MultiVarConstantPropagationTest.cpp diff --git a/clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp b/clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp new file mode 100644 index 0000000000000..eab37045c393e --- /dev/null +++ b/clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp @@ -0,0 +1,152 @@ +#include "TestingSupport.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "llvm/Testing/Support/Error.h" +#include "gtest/gtest.h" +#include + +namespace clang::dataflow::test { +namespace { + +struct TestLattice { + int Elements = 0; + int Branches = 0; + int Joins = 0; + + LatticeJoinEffect join(const TestLattice &Other) { + if (Joins < 3) { + ++Joins; + Elements += Other.Elements; + Branches += Other.Branches; + return LatticeJoinEffect::Changed; + } + return LatticeJoinEffect::Unchanged; + } + friend bool operator==(const TestLattice &LHS, const TestLattice &RHS) { + return std::tie(LHS.Elements, LHS.Branches, LHS.Joins) == + std::tie(RHS.Elements, RHS.Branches, RHS.Joins); + } +}; + +class TestAnalysis : public DataflowAnalysis { +public: + using DataflowAnalysis::DataflowAnalysis; + + static TestLattice initialElement() { return TestLattice{}; } + void transfer(const CFGElement &, TestLattice &L, Environment &E) { + E.logger().log([](llvm::raw_ostream &OS) { OS << "transfer()"; }); + ++L.Elements; + } + void transferBranch(bool Branch, const Stmt *S, TestLattice &L, + Environment &E) { + E.logger().log([&](llvm::raw_ostream &OS) { + OS << "transferBranch(" << Branch << ")"; + }); + ++L.Branches; + } +}; + +class TestLogger : public Logger { +public: + TestLogger(std::string &S) : OS(S) {} + +private: + llvm::raw_string_ostream OS; + + void beginAnalysis(const ControlFlowContext &, + TypeErasedDataflowAnalysis &) override { + logText("beginAnalysis()"); + } + void endAnalysis() override { logText("\nendAnalysis()"); } + + void enterBlock(const CFGBlock &B) override { + OS << "\nenterBlock(" << B.BlockID << ")\n"; + } + void enterElement(const CFGElement &E) override { + // we don't want the trailing \n + std::string S; + llvm::raw_string_ostream SS(S); + E.dumpToStream(SS); + + OS << "enterElement(" << llvm::StringRef(S).trim() << ")\n"; + } + void recordState(TypeErasedDataflowAnalysisState &S) override { + const TestLattice &L = llvm::any_cast(S.Lattice.Value); + OS << "recordState(Elements=" << L.Elements << ", Branches=" << L.Branches + << ", Joins=" << L.Joins << ")\n"; + } + /// Records that the analysis state for the current block is now final. + void blockConverged() override { logText("blockConverged()"); } + + void logText(llvm::StringRef Text) override { OS << Text << "\n"; } +}; + +TEST(LoggerTest, Sequence) { + const char *Code = R"cpp( +int target(bool b, int p, int q) { + return b ? p : q; +} +)cpp"; + + auto Inputs = AnalysisInputs( + Code, ast_matchers::hasName("target"), + [](ASTContext &C, Environment &) { return TestAnalysis(C); }); + std::vector Args = { + "-fsyntax-only", "-fno-delayed-template-parsing", "-std=c++17"}; + Inputs.ASTBuildArgs = Args; + std::string Log; + TestLogger Logger(Log); + Inputs.BuiltinOptions.Log = &Logger; + + ASSERT_THAT_ERROR(checkDataflow(std::move(Inputs), + [](const AnalysisOutputs &) {}), + llvm::Succeeded()); + + EXPECT_EQ(Log, R"(beginAnalysis() + +enterBlock(4) +recordState(Elements=0, Branches=0, Joins=0) +enterElement(b) +transfer() +recordState(Elements=1, Branches=0, Joins=0) +enterElement(b (ImplicitCastExpr, LValueToRValue, _Bool)) +transfer() +recordState(Elements=2, Branches=0, Joins=0) + +enterBlock(3) +transferBranch(0) +recordState(Elements=2, Branches=1, Joins=0) +enterElement(q) +transfer() +recordState(Elements=3, Branches=1, Joins=0) + +enterBlock(2) +transferBranch(1) +recordState(Elements=2, Branches=1, Joins=0) +enterElement(p) +transfer() +recordState(Elements=3, Branches=1, Joins=0) + +enterBlock(1) +recordState(Elements=6, Branches=2, Joins=1) +enterElement(b ? p : q) +transfer() +recordState(Elements=7, Branches=2, Joins=1) +enterElement(b ? p : q (ImplicitCastExpr, LValueToRValue, int)) +transfer() +recordState(Elements=8, Branches=2, Joins=1) +enterElement(return b ? p : q;) +transfer() +recordState(Elements=9, Branches=2, Joins=1) + +enterBlock(0) +recordState(Elements=9, Branches=2, Joins=1) + +endAnalysis() +)"); +} + +} // namespace +} // namespace clang::dataflow::test diff --git a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h index bc089f141850a..ef67dc98790c0 100644 --- a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h +++ b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h @@ -389,6 +389,20 @@ checkDataflow(AnalysisInputs AI, /// `Name` must be unique in `ASTCtx`. const ValueDecl *findValueDecl(ASTContext &ASTCtx, llvm::StringRef Name); +/// Returns the value (of type `ValueT`) for the given identifier. +/// `ValueT` must be a subclass of `Value` and must be of the appropriate type. +/// +/// Requirements: +/// +/// `Name` must be unique in `ASTCtx`. +template +ValueT &getValueForDecl(ASTContext &ASTCtx, const Environment &Env, + llvm::StringRef Name) { + const ValueDecl *VD = findValueDecl(ASTCtx, Name); + assert(VD != nullptr); + return *cast(Env.getValue(*VD, SkipPast::None)); +} + /// Creates and owns constraints which are boolean values. class ConstraintContext { public: diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp index 9c16335714c55..1bb772a93bda6 100644 --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -5104,4 +5104,70 @@ TEST(TransferTest, UnnamedBitfieldInitializer) { }); } +// Repro for a crash that used to occur when we call a `noreturn` function +// within one of the operands of a `&&` or `||` operator. +TEST(TransferTest, NoReturnFunctionInsideShortCircuitedBooleanOp) { + std::string Code = R"( + __attribute__((noreturn)) int doesnt_return(); + bool some_condition(); + void target(bool b1, bool b2) { + // Neither of these should crash. In addition, if we don't terminate the + // program, we know that the operators need to trigger the short-circuit + // logic, so `NoreturnOnRhsOfAnd` will be false and `NoreturnOnRhsOfOr` + // will be true. + bool NoreturnOnRhsOfAnd = b1 && doesnt_return() > 0; + bool NoreturnOnRhsOfOr = b2 || doesnt_return() > 0; + + // Calling a `noreturn` function on the LHS of an `&&` or `||` makes the + // entire expression unreachable. So we know that in both of the following + // cases, if `target()` terminates, the `else` branch was taken. + bool NoreturnOnLhsMakesAndUnreachable = false; + if (some_condition()) + doesnt_return() > 0 && some_condition(); + else + NoreturnOnLhsMakesAndUnreachable = true; + + bool NoreturnOnLhsMakesOrUnreachable = false; + if (some_condition()) + doesnt_return() > 0 || some_condition(); + else + NoreturnOnLhsMakesOrUnreachable = true; + + // [[p]] + } + )"; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results.keys(), UnorderedElementsAre("p")); + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + + // Check that [[p]] is reachable with a non-false flow condition. + EXPECT_FALSE(Env.flowConditionImplies(Env.getBoolLiteralValue(false))); + + auto &B1 = getValueForDecl(ASTCtx, Env, "b1"); + EXPECT_TRUE(Env.flowConditionImplies(Env.makeNot(B1))); + + auto &NoreturnOnRhsOfAnd = + getValueForDecl(ASTCtx, Env, "NoreturnOnRhsOfAnd"); + EXPECT_TRUE(Env.flowConditionImplies(Env.makeNot(NoreturnOnRhsOfAnd))); + + auto &B2 = getValueForDecl(ASTCtx, Env, "b2"); + EXPECT_TRUE(Env.flowConditionImplies(B2)); + + auto &NoreturnOnRhsOfOr = + getValueForDecl(ASTCtx, Env, "NoreturnOnRhsOfOr"); + EXPECT_TRUE(Env.flowConditionImplies(NoreturnOnRhsOfOr)); + + auto &NoreturnOnLhsMakesAndUnreachable = getValueForDecl( + ASTCtx, Env, "NoreturnOnLhsMakesAndUnreachable"); + EXPECT_TRUE(Env.flowConditionImplies(NoreturnOnLhsMakesAndUnreachable)); + + auto &NoreturnOnLhsMakesOrUnreachable = getValueForDecl( + ASTCtx, Env, "NoreturnOnLhsMakesOrUnreachable"); + EXPECT_TRUE(Env.flowConditionImplies(NoreturnOnLhsMakesOrUnreachable)); + }); +} + } // namespace diff --git a/clang/unittests/Driver/MultilibTest.cpp b/clang/unittests/Driver/MultilibTest.cpp index 2e729a5051734..6a066f6b0f5a6 100644 --- a/clang/unittests/Driver/MultilibTest.cpp +++ b/clang/unittests/Driver/MultilibTest.cpp @@ -33,14 +33,14 @@ TEST(MultilibTest, OpEqReflexivity2) { } TEST(MultilibTest, OpEqReflexivity3) { - Multilib M1({}, {}, {}, 0, {"+foo"}); - Multilib M2({}, {}, {}, 0, {"+foo"}); + Multilib M1({}, {}, {}, {"+foo"}); + Multilib M2({}, {}, {}, {"+foo"}); ASSERT_TRUE(M1 == M2) << "Multilibs with the same flag should be the same"; } TEST(MultilibTest, OpEqInequivalence1) { - Multilib M1({}, {}, {}, 0, {"+foo"}); - Multilib M2({}, {}, {}, 0, {"-foo"}); + Multilib M1({}, {}, {}, {"+foo"}); + Multilib M2({}, {}, {}, {"-foo"}); ASSERT_FALSE(M1 == M2) << "Multilibs with conflicting flags are not the same"; ASSERT_FALSE(M2 == M1) << "Multilibs with conflicting flags are not the same (commuted)"; @@ -48,7 +48,7 @@ TEST(MultilibTest, OpEqInequivalence1) { TEST(MultilibTest, OpEqInequivalence2) { Multilib M1; - Multilib M2({}, {}, {}, 0, {"+foo"}); + Multilib M2({}, {}, {}, {"+foo"}); ASSERT_FALSE(M1 == M2) << "Flags make Multilibs different"; } @@ -124,7 +124,7 @@ TEST(MultilibTest, Construction2) { } TEST(MultilibTest, Construction3) { - Multilib M({}, {}, {}, 0, {"+f1", "+f2", "-f3"}); + Multilib M({}, {}, {}, {"+f1", "+f2", "-f3"}); for (Multilib::flags_list::const_iterator I = M.flags().begin(), E = M.flags().end(); I != E; ++I) { @@ -149,8 +149,8 @@ TEST(MultilibTest, SetPushback) { TEST(MultilibTest, SetPriority) { MultilibSet MS({ - Multilib("/foo", {}, {}, 1, {"+foo"}), - Multilib("/bar", {}, {}, 2, {"+bar"}), + Multilib("/foo", {}, {}, {"+foo"}), + Multilib("/bar", {}, {}, {"+bar"}), }); Multilib::flags_list Flags1 = {"+foo", "-bar"}; Multilib Selection1; @@ -166,3 +166,24 @@ TEST(MultilibTest, SetPriority) { ASSERT_TRUE(Selection2.gccSuffix() == "/bar") << "Selection picked " << Selection2 << " which was not expected"; } + +TEST(MultilibTest, SelectMultiple) { + MultilibSet MS({ + Multilib("/a", {}, {}, {"x"}), + Multilib("/b", {}, {}, {"y"}), + }); + std::vector Selection; + + Selection = MS.select({"x"}); + ASSERT_EQ(1u, Selection.size()); + EXPECT_EQ("/a", Selection[0].gccSuffix()); + + Selection = MS.select({"y"}); + ASSERT_EQ(1u, Selection.size()); + EXPECT_EQ("/b", Selection[0].gccSuffix()); + + Selection = MS.select({"y", "x"}); + ASSERT_EQ(2u, Selection.size()); + EXPECT_EQ("/a", Selection[0].gccSuffix()); + EXPECT_EQ("/b", Selection[1].gccSuffix()); +} diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 1beb6a75c5225..eb1b563b3d2c3 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -8010,6 +8010,11 @@ TEST_F(FormatTest, TrailingReturnType) { "auto aaaaaaaaaaaaaaaaaaaaaa(T t)\n" " -> decltype(eaaaaaaaaaaaaaaa(t.a).aaaaaaaa());"); + FormatStyle Style = getLLVMStyleWithColumns(60); + verifyFormat("#define MAKE_DEF(NAME) \\\n" + " auto NAME() -> int { return 42; }", + Style); + // Not trailing return types. verifyFormat("void f() { auto a = b->c(); }"); verifyFormat("auto a = p->foo();"); @@ -11592,6 +11597,10 @@ TEST_F(FormatTest, UnderstandsRvalueReferences) { verifyFormat("template class A {\n" " static_assert(B && C, \"Something is wrong\");\n" "};"); + verifyFormat("template void swap() noexcept(Bar && Foo);"); + verifyFormat("template struct S {\n" + " explicit(Bar && Foo) S(const S &);\n" + "};"); verifyGoogleFormat("#define IF(a, b, c) if (a && (b == c))"); verifyGoogleFormat("#define WHILE(a, b, c) while (a && (b == c))"); verifyFormat("#define A(a, b) (a && b)"); diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index bc8f7f36372d2..3a6fb0e9e4b3f 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -242,6 +242,17 @@ TEST_F(TokenAnnotatorTest, UnderstandsUsesOfStarAndAmp) { "}"); ASSERT_EQ(Tokens.size(), 12u) << Tokens; EXPECT_TOKEN(Tokens[7], tok::amp, TT_BinaryOperator); + + Tokens = + annotate("template void swap() noexcept(Bar && Foo);"); + ASSERT_EQ(Tokens.size(), 23u) << Tokens; + EXPECT_TOKEN(Tokens[15], tok::ampamp, TT_BinaryOperator); + + Tokens = annotate("template struct S {\n" + " explicit(Bar && Foo) S(const S &);\n" + "};"); + ASSERT_EQ(Tokens.size(), 30u) << Tokens; + EXPECT_TOKEN(Tokens[14], tok::ampamp, TT_BinaryOperator); } TEST_F(TokenAnnotatorTest, UnderstandsUsesOfPlusAndMinus) { diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake index e3fe5570de26a..99d672de4e882 100644 --- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake +++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake @@ -42,7 +42,7 @@ if(OS_NAME MATCHES "Linux") elseif (OS_NAME MATCHES "Windows") set(ALL_FUZZER_SUPPORTED_ARCH ${X86} ${X86_64}) elseif(OS_NAME MATCHES "Android") - set(ALL_FUZZER_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64}) + set(ALL_FUZZER_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${RISCV64}) else() set(ALL_FUZZER_SUPPORTED_ARCH ${X86_64} ${ARM64}) endif() diff --git a/compiler-rt/lib/fuzzer/FuzzerMerge.cpp b/compiler-rt/lib/fuzzer/FuzzerMerge.cpp index 24bd11958e807..8c8806e8aafd3 100644 --- a/compiler-rt/lib/fuzzer/FuzzerMerge.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerMerge.cpp @@ -77,6 +77,7 @@ bool Merger::Parse(std::istream &IS, bool ParseCoverage) { size_t ExpectedStartMarker = 0; const size_t kInvalidStartMarker = -1; size_t LastSeenStartMarker = kInvalidStartMarker; + bool HaveFtMarker = true; std::vector TmpFeatures; std::set PCs; while (std::getline(IS, Line, '\n')) { @@ -93,12 +94,13 @@ bool Merger::Parse(std::istream &IS, bool ParseCoverage) { LastSeenStartMarker = ExpectedStartMarker; assert(ExpectedStartMarker < Files.size()); ExpectedStartMarker++; + HaveFtMarker = false; } else if (Marker == "FT") { // FT FILE_ID COV1 COV2 COV3 ... size_t CurrentFileIdx = N; if (CurrentFileIdx != LastSeenStartMarker) return false; - LastSeenStartMarker = kInvalidStartMarker; + HaveFtMarker = true; if (ParseCoverage) { TmpFeatures.clear(); // use a vector from outer scope to avoid resizes. while (ISS1 >> N) @@ -108,6 +110,8 @@ bool Merger::Parse(std::istream &IS, bool ParseCoverage) { } } else if (Marker == "COV") { size_t CurrentFileIdx = N; + if (CurrentFileIdx != LastSeenStartMarker) + return false; if (ParseCoverage) while (ISS1 >> N) if (PCs.insert(N).second) @@ -116,7 +120,7 @@ bool Merger::Parse(std::istream &IS, bool ParseCoverage) { return false; } } - if (LastSeenStartMarker != kInvalidStartMarker) + if (!HaveFtMarker && LastSeenStartMarker != kInvalidStartMarker) LastFailure = Files[LastSeenStartMarker].Name; FirstNotProcessedFile = ExpectedStartMarker; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.h b/compiler-rt/lib/sanitizer_common/sanitizer_linux.h index 2c769dd59aa09..c84c04a877594 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.h @@ -152,6 +152,9 @@ inline void ReleaseMemoryPagesToOSAndZeroFill(uptr beg, uptr end) { "rdhwr %0,$29\n" \ ".set pop\n" : "=r"(__v)); \ __v; }) +#elif defined (__riscv) +# define __get_tls() \ + ({ void** __v; __asm__("mv %0, tp" : "=r"(__v)); __v; }) #elif defined(__i386__) # define __get_tls() \ ({ void** __v; __asm__("movl %%gs:0, %0" : "=r"(__v)); __v; }) diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h index 1cb6d02f6cd6a..bca5ab82f3d59 100644 --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -64,32 +64,8 @@ template class SizeClassAllocator64 { void init(s32 ReleaseToOsInterval) NO_THREAD_SAFETY_ANALYSIS { DCHECK(isAligned(reinterpret_cast(this), alignof(ThisT))); - DCHECK_EQ(PrimaryBase, 0U); - - // Reserve the space required for the Primary. - PrimaryBase = reinterpret_cast(map( - nullptr, PrimarySize, "scudo:primary_reserve", MAP_NOACCESS, &Data)); - u32 Seed; - const u64 Time = getMonotonicTimeFast(); - if (!getRandom(reinterpret_cast(&Seed), sizeof(Seed))) - Seed = static_cast(Time ^ (PrimaryBase >> 12)); const uptr PageSize = getPageSizeCached(); - for (uptr I = 0; I < NumClasses; I++) { - RegionInfo *Region = getRegionInfo(I); - // The actual start of a region is offset by a random number of pages - // when PrimaryEnableRandomOffset is set. - Region->RegionBeg = (PrimaryBase + (I << Config::PrimaryRegionSizeLog)) + - (Config::PrimaryEnableRandomOffset - ? ((getRandomModN(&Seed, 16) + 1) * PageSize) - : 0); - Region->RandState = getRandomU32(&Seed); - Region->ReleaseInfo.LastReleaseAtNs = Time; - } - shuffle(RegionInfoArray, NumClasses, &Seed); - - setOption(Option::ReleaseInterval, static_cast(ReleaseToOsInterval)); - const uptr GroupSize = (1U << GroupSizeLog); const uptr PagesInGroup = GroupSize / PageSize; const uptr MinSizeClass = getSizeByClassId(1); @@ -126,6 +102,37 @@ template class SizeClassAllocator64 { // use its size of in-use blocks as a heuristic. SmallerBlockReleasePageDelta = PagesInGroup * (1 + MinSizeClass / 16U) / 100; + + DCHECK_EQ(PrimaryBase, 0U); + // Reserve the space required for the Primary. + PrimaryBase = reinterpret_cast(map( + nullptr, PrimarySize, "scudo:primary_reserve", MAP_NOACCESS, &Data)); + + u32 Seed; + const u64 Time = getMonotonicTimeFast(); + if (!getRandom(reinterpret_cast(&Seed), sizeof(Seed))) + Seed = static_cast(Time ^ (PrimaryBase >> 12)); + + for (uptr I = 0; I < NumClasses; I++) { + RegionInfo *Region = getRegionInfo(I); + // The actual start of a region is offset by a random number of pages + // when PrimaryEnableRandomOffset is set. + Region->RegionBeg = (PrimaryBase + (I << Config::PrimaryRegionSizeLog)) + + (Config::PrimaryEnableRandomOffset + ? ((getRandomModN(&Seed, 16) + 1) * PageSize) + : 0); + Region->RandState = getRandomU32(&Seed); + // Releasing small blocks is expensive, set a higher threshold to avoid + // frequent page releases. + if (isSmallBlock(getSizeByClassId(I))) + Region->TryReleaseThreshold = PageSize * SmallerBlockReleasePageDelta; + else + Region->TryReleaseThreshold = PageSize; + Region->ReleaseInfo.LastReleaseAtNs = Time; + } + shuffle(RegionInfoArray, NumClasses, &Seed); + + setOption(Option::ReleaseInterval, static_cast(ReleaseToOsInterval)); } void unmapTestOnly() NO_THREAD_SAFETY_ANALYSIS { @@ -440,6 +447,8 @@ template class SizeClassAllocator64 { uptr MappedUser GUARDED_BY(Mutex) = 0; // Bytes allocated for user memory. uptr AllocatedUser GUARDED_BY(Mutex) = 0; + // The minimum size of pushed blocks to trigger page release. + uptr TryReleaseThreshold GUARDED_BY(Mutex) = 0; MapPlatformData Data GUARDED_BY(Mutex) = {}; ReleaseToOsInfo ReleaseInfo GUARDED_BY(Mutex) = {}; bool Exhausted GUARDED_BY(Mutex) = false; @@ -486,6 +495,11 @@ template class SizeClassAllocator64 { return Base + (CompactPtrGroupBase << CompactPtrScale); } + ALWAYS_INLINE static bool isSmallBlock(uptr BlockSize) { + const uptr PageSize = getPageSizeCached(); + return BlockSize < PageSize / 16U; + } + // Push the blocks to their batch group. The layout will be like, // // FreeList - > BG -> BG -> BG @@ -823,14 +837,15 @@ template class SizeClassAllocator64 { return 0; // Nothing new to release. const bool CheckDensity = - BlockSize < PageSize / 16U && ReleaseType != ReleaseToOS::ForceAll; + isSmallBlock(BlockSize) && ReleaseType != ReleaseToOS::ForceAll; // Releasing smaller blocks is expensive, so we want to make sure that a // significant amount of bytes are free, and that there has been a good // amount of batches pushed to the freelist before attempting to release. if (CheckDensity) { if (ReleaseType == ReleaseToOS::Normal && - BytesPushed < Region->AllocatedUser / 16U) + BytesPushed < Region->TryReleaseThreshold) { return 0; + } } if (ReleaseType == ReleaseToOS::Normal) { @@ -865,11 +880,18 @@ template class SizeClassAllocator64 { // of groups. uptr NumberOfBatchGroups = Region->FreeList.size(); + // We are examining each group and will take the minimum distance to the + // release threshold as the next Region::TryReleaseThreshold(). Note that if + // the size of free blocks has reached the release threshold, the distance + // to the next release will be PageSize * SmallerBlockReleasePageDelta. See + // the comment on `SmallerBlockReleasePageDelta` for more details. + uptr MinDistToThreshold = GroupSize; + for (BatchGroup *BG = Region->FreeList.front(), *Prev = nullptr; BG != nullptr;) { const uptr PushedBytesDelta = - BG->PushedBlocks - BG->PushedBlocksAtLastCheckpoint; - if (PushedBytesDelta * BlockSize < PageSize) { + (BG->PushedBlocks - BG->PushedBlocksAtLastCheckpoint) * BlockSize; + if (PushedBytesDelta < PageSize) { Prev = BG; BG = BG->Next; continue; @@ -913,16 +935,38 @@ template class SizeClassAllocator64 { // that this heuristic only applies when all the spaces in a BatchGroup // are allocated. if (CheckDensity) { - const bool HighDensity = (BytesInBG * 100U) / AllocatedGroupSize >= - (100U - 1U - BlockSize / 16U); + const uptr ReleaseThreshold = + (AllocatedGroupSize * (100 - 1U - BlockSize / 16U)) / 100U; + const bool HighDensity = BytesInBG >= ReleaseThreshold; const bool MayHaveReleasedAll = NumBlocks >= (GroupSize / BlockSize); // If all blocks in the group are released, we will do range marking // which is fast. Otherwise, we will wait until we have accumulated // a certain amount of free memory. const bool ReachReleaseDelta = - MayHaveReleasedAll ? true - : PushedBytesDelta * BlockSize >= - PageSize * SmallerBlockReleasePageDelta; + MayHaveReleasedAll + ? true + : PushedBytesDelta >= PageSize * SmallerBlockReleasePageDelta; + + if (!HighDensity) { + DCHECK_LE(BytesInBG, ReleaseThreshold); + // The following is the usage of a memroy group, + // + // BytesInBG ReleaseThreshold + // / \ v + // +---+---------------------------+-----+ + // | | | | | + // +---+---------------------------+-----+ + // \ / ^ + // PushedBytesDelta GroupEnd + MinDistToThreshold = + Min(MinDistToThreshold, + ReleaseThreshold - BytesInBG + PushedBytesDelta); + } else { + // If it reaches high density at this round, the next time we will try + // to release is based on SmallerBlockReleasePageDelta + MinDistToThreshold = + Min(MinDistToThreshold, PageSize * SmallerBlockReleasePageDelta); + } if (!HighDensity || !ReachReleaseDelta) { Prev = BG; @@ -976,6 +1020,16 @@ template class SizeClassAllocator64 { GroupToRelease.push_back(Cur); } + // Only small blocks have the adaptive `TryReleaseThreshold`. + if (isSmallBlock(BlockSize)) { + // If the MinDistToThreshold is not updated, that means each memory group + // may have only pushed less than a page size. In that case, just set it + // back to normal. + if (MinDistToThreshold == GroupSize) + MinDistToThreshold = PageSize * SmallerBlockReleasePageDelta; + Region->TryReleaseThreshold = MinDistToThreshold; + } + if (GroupToRelease.empty()) return 0; diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp index 97aa4b77311f1..6ac6ac6a7fb4c 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp @@ -2497,11 +2497,21 @@ static void HandleRecvmsg(ThreadState *thr, uptr pc, res; \ }) +// Ignore interceptors in OnLibraryLoaded()/Unloaded(). These hooks use code +// (ListOfModules::init, MemoryMappingLayout::DumpListOfModules) that make +// intercepted calls, which can cause deadlockes with ReportRace() which also +// uses this code. #define COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, handle) \ - libignore()->OnLibraryLoaded(filename) + ({ \ + ScopedIgnoreInterceptors ignore_interceptors; \ + libignore()->OnLibraryLoaded(filename); \ + }) -#define COMMON_INTERCEPTOR_LIBRARY_UNLOADED() \ - libignore()->OnLibraryUnloaded() +#define COMMON_INTERCEPTOR_LIBRARY_UNLOADED() \ + ({ \ + ScopedIgnoreInterceptors ignore_interceptors; \ + libignore()->OnLibraryUnloaded(); \ + }) #define COMMON_INTERCEPTOR_ACQUIRE(ctx, u) \ Acquire(((TsanInterceptorContext *) ctx)->thr, pc, u) diff --git a/compiler-rt/test/builtins/Unit/divmodti4_test.c b/compiler-rt/test/builtins/Unit/divmodti4_test.c index 26b3c1609f18c..7b8d3faae2cdf 100644 --- a/compiler-rt/test/builtins/Unit/divmodti4_test.c +++ b/compiler-rt/test/builtins/Unit/divmodti4_test.c @@ -55,22 +55,22 @@ char assumption_1[sizeof(ti_int) == 2*sizeof(di_int)] = {0}; tu_int tests[][4] = { -{ (ti_int) 0, (ti_int) 1, (ti_int) 0, (ti_int) 0 }, -{ (ti_int) 0, (ti_int)-1, (ti_int) 0, (ti_int) 0 }, -{ (ti_int) 2, (ti_int) 1, (ti_int) 2, (ti_int) 0 }, -{ (ti_int) 2, (ti_int)-1, (ti_int)-2, (ti_int) 0 }, -{ (ti_int)-2, (ti_int) 1, (ti_int)-2, (ti_int) 0 }, -{ (ti_int)-2, (ti_int)-1, (ti_int) 2, (ti_int) 0 }, -{ (ti_int) 5, (ti_int) 3, (ti_int) 1, (ti_int) 2 }, -{ (ti_int) 5, (ti_int)-3, (ti_int)-1, (ti_int) 2 }, -{ (ti_int)-5, (ti_int) 3, (ti_int)-1, (ti_int)-2 }, -{ (ti_int)-5, (ti_int)-3, (ti_int) 1, (ti_int)-2 }, -{ (ti_int)0x8000000000000000LL << 64 | 0, (ti_int) 1, (ti_int)0x8000000000000000LL << 64 | 0, (ti_int)0x0LL }, -{ (ti_int)0x8000000000000000LL << 64 | 0, (ti_int)-1, (ti_int)0x8000000000000000LL << 64 | 0, (ti_int)0x0LL }, -{ (ti_int)0x8000000000000000LL << 64 | 0, (ti_int)-2, (ti_int)0x4000000000000000LL << 64 | 0, (ti_int)0x0LL }, -{ (ti_int)0x8000000000000000LL << 64 | 0, (ti_int) 2, (ti_int)0xC000000000000000LL << 64 | 0, (ti_int)0x0LL }, -{ (ti_int)0x8000000000000000LL << 64 | 0, (ti_int)-3, (ti_int)0x2AAAAAAAAAAAAAAALL << 64 | 0xAAAAAAAAAAAAAAAALL, (ti_int)-2 }, -{ (ti_int)0x8000000000000000LL << 64 | 0, (ti_int) 3, (ti_int)0xD555555555555555LL << 64 | 0x5555555555555556LL, (ti_int)-2 }, +{ (ti_int) 0, (ti_int) 1, (ti_int) 0, (ti_int) 0 }, +{ (ti_int) 0, (ti_int)-1, (ti_int) 0, (ti_int) 0 }, +{ (ti_int) 2, (ti_int) 1, (ti_int) 2, (ti_int) 0 }, +{ (ti_int) 2, (ti_int)-1, (ti_int)-2, (ti_int) 0 }, +{ (ti_int)-2, (ti_int) 1, (ti_int)-2, (ti_int) 0 }, +{ (ti_int)-2, (ti_int)-1, (ti_int) 2, (ti_int) 0 }, +{ (ti_int) 5, (ti_int) 3, (ti_int) 1, (ti_int) 2 }, +{ (ti_int) 5, (ti_int)-3, (ti_int)-1, (ti_int) 2 }, +{ (ti_int)-5, (ti_int) 3, (ti_int)-1, (ti_int)-2 }, +{ (ti_int)-5, (ti_int)-3, (ti_int) 1, (ti_int)-2 }, +{ (ti_int)0x8000000000000000ULL << 64 | 0, (ti_int) 1, (ti_int)0x8000000000000000ULL << 64 | 0, (ti_int)0x0LL }, +{ (ti_int)0x8000000000000000ULL << 64 | 0, (ti_int)-1, (ti_int)0x8000000000000000ULL << 64 | 0, (ti_int)0x0LL }, +{ (ti_int)0x8000000000000000ULL << 64 | 0, (ti_int)-2, (ti_int)0x4000000000000000ULL << 64 | 0, (ti_int)0x0LL }, +{ (ti_int)0x8000000000000000ULL << 64 | 0, (ti_int) 2, (ti_int)0xC000000000000000ULL << 64 | 0, (ti_int)0x0LL }, +{ (ti_int)0x8000000000000000ULL << 64 | 0, (ti_int)-3, (ti_int)0x2AAAAAAAAAAAAAAAULL << 64 | 0xAAAAAAAAAAAAAAAAULL, (ti_int)-2 }, +{ (ti_int)0x8000000000000000ULL << 64 | 0, (ti_int) 3, (ti_int)0xD555555555555555ULL << 64 | 0x5555555555555556ULL, (ti_int)-2 }, }; #endif diff --git a/compiler-rt/test/fuzzer/merge-control-file.test b/compiler-rt/test/fuzzer/merge-control-file.test index ebd2cf5af3baa..c7d666ea471e9 100644 --- a/compiler-rt/test/fuzzer/merge-control-file.test +++ b/compiler-rt/test/fuzzer/merge-control-file.test @@ -50,3 +50,17 @@ RUN: echo STARTED 2 2 >> %t/MCF RUN: echo FT 2 13 >> %t/MCF RUN: %run %t/T.exe -merge=1 %t/T1 %t/T2 -merge_control_file=%t/MCF 2>&1 | FileCheck %s --check-prefix=OK_3 OK_3: MERGE-OUTER: nothing to do, merge has been completed before + +# Test for invalid COV file_id +RUN: rm -f %t/T1/*; cp %t/T0/* %t/T1 +RUN: echo 3 > %t/MCF; echo 0 >> %t/MCF; echo %t/T1/1 >> %t/MCF; echo %t/T1/2 >> %t/MCF; echo %t/T1/3 >> %t/MCF +RUN: echo STARTED 0 1 >> %t/MCF +RUN: echo FT 0 11 >> %t/MCF +RUN: echo STARTED 1 2 >> %t/MCF +RUN: echo FT 1 12 >> %t/MCF +RUN: echo STARTED 2 2 >> %t/MCF +RUN: echo FT 2 13 >> %t/MCF +# Invalid file-id 21 here +RUN: echo COV 21 13 >> %t/MCF +RUN: %run %t/T.exe -merge=1 %t/T1 %t/T2 -merge_control_file=%t/MCF 2>&1 | FileCheck %s --check-prefix=COV_INVALID +COV_INVALID: MERGE-OUTER: bad control file, will overwrite it diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/decorate_proc_maps.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/decorate_proc_maps.cpp index 60f32c2d83dbf..d0400afe743b3 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Linux/decorate_proc_maps.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/decorate_proc_maps.cpp @@ -49,6 +49,10 @@ int main(void) { // CHECK-asan: ---p {{.*}} [shadow gap] // CHECK-asan: rw-p {{.*}} [high shadow] +// CHECK-hwasan: rw-p {{.*}} [low shadow] +// CHECK-hwasan: ---p {{.*}} [shadow gap] +// CHECK-hwasan: rw-p {{.*}} [high shadow] + // CHECK-msan: ---p {{.*}} [invalid] // CHECK-msan: rw-p {{.*}} [shadow{{.*}}] // CHECK-msan: ---p {{.*}} [origin{{.*}}] diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/mprobe.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/mprobe.cpp index 82c0faf0e2add..7633eb4762292 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Linux/mprobe.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/mprobe.cpp @@ -1,5 +1,5 @@ // RUN: %clangxx %s -o %t && %run %t 2>&1 | FileCheck %s -// UNSUPPORTED: android, ubsan +// UNSUPPORTED: android, hwasan, ubsan #include #include diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/unexpected_format_specifier_test.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/unexpected_format_specifier_test.cpp index 641495508ba10..fdce916ad1e1a 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Linux/unexpected_format_specifier_test.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/unexpected_format_specifier_test.cpp @@ -1,4 +1,5 @@ // RUN: %clang -w -O0 %s -o %t && %run %t 2>&1 | FileCheck %s +// UNSUPPORTED: hwasan // UNSUPPORTED: lsan // UNSUPPORTED: msan // UNSUPPORTED: ubsan diff --git a/compiler-rt/test/tsan/Darwin/external-swift-debugging.cpp b/compiler-rt/test/tsan/Darwin/external-swift-debugging.cpp index 64475a3e97373..8f8b2d514ea0b 100644 --- a/compiler-rt/test/tsan/Darwin/external-swift-debugging.cpp +++ b/compiler-rt/test/tsan/Darwin/external-swift-debugging.cpp @@ -30,9 +30,6 @@ int main(int argc, char *argv[]) { fprintf(stderr, "Start.\n"); // CHECK: Start. - fprintf(stderr, "ExternalWrite function address: %p\n", &ExternalWrite); - // CHECK: ExternalWrite function address: [[ExternalWrite_addr:0x[0-9a-z]+]] - void *opaque_object = malloc(16); std::thread t1([opaque_object] { ExternalWrite(opaque_object); @@ -85,7 +82,7 @@ __tsan_on_report(void *report) { info.dli_saddr, info.dli_sname); } // Ensure ExternalWrite() function is top of trace - // CHECK: 0: frame: 0x{{[0-9a-z]+}}, function: [[ExternalWrite_addr]] _Z13ExternalWritePv + // CHECK: 0: frame: 0x{{[0-9a-z]+}}, function: 0x{{[0-9a-z]+}} _Z13ExternalWritePv } // CHECK: Done. diff --git a/flang/examples/CMakeLists.txt b/flang/examples/CMakeLists.txt index 8cc66ddbbbb0e..23fea3920efb6 100644 --- a/flang/examples/CMakeLists.txt +++ b/flang/examples/CMakeLists.txt @@ -1,4 +1,3 @@ add_subdirectory(ExternalHelloWorld) add_subdirectory(PrintFlangFunctionNames) add_subdirectory(FlangOmpReport) -add_subdirectory(FeatureList) diff --git a/flang/examples/FeatureList/CMakeLists.txt b/flang/examples/FeatureList/CMakeLists.txt deleted file mode 100644 index e17a7bebbff05..0000000000000 --- a/flang/examples/FeatureList/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -add_llvm_example_library(flangFeatureList - MODULE - FeatureList.cpp - - DEPENDS - acc_gen - flangFrontend - omp_gen -) diff --git a/flang/examples/FeatureList/FeatureList.cpp b/flang/examples/FeatureList/FeatureList.cpp deleted file mode 100644 index 7d7e63e148bc0..0000000000000 --- a/flang/examples/FeatureList/FeatureList.cpp +++ /dev/null @@ -1,761 +0,0 @@ -//===-- FeatureList.cpp ---------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// A plugin that counts the amount of times a particular parse tree node -// occurs. This plugin should cover each feature covered in dump-parse-tree.h -// -//===----------------------------------------------------------------------===// - -#include "flang/Frontend/FrontendActions.h" -#include "flang/Frontend/FrontendPluginRegistry.h" -#include "flang/Parser/parse-tree-visitor.h" -#include "flang/Parser/parse-tree.h" -#include "flang/Parser/parsing.h" - -#include - -using namespace Fortran::frontend; -using namespace Fortran::parser; -using namespace Fortran; - -#define READ_FEATURE_CUST(classname, n) \ - bool Pre(const classname &) { \ - record(#n); \ - return true; \ - } \ - void Post(const classname &) {} - -#define READ_FEATURE(classname) READ_FEATURE_CUST(classname, classname) - -struct NodeVisitor { -private: - std::map frequencies; - - void record(const char *name) { - const auto [it, ins] = frequencies.insert({name, 1}); - if (!ins) { - frequencies[name] = it->second + 1; - } - } - -public: - const std::map &getFrequencies() const { - return frequencies; - } - - READ_FEATURE_CUST(format::ControlEditDesc, ControlEditDesc) - READ_FEATURE_CUST(format::DerivedTypeDataEditDesc, DerivedTypeDataEditDesc) - READ_FEATURE_CUST(format::FormatItem, FormatItem) - READ_FEATURE_CUST(format::FormatSpecification, FormatSpecification) - READ_FEATURE_CUST( - format::IntrinsicTypeDataEditDesc, IntrinsicTypeDataEditDesc) - READ_FEATURE(Abstract) - READ_FEATURE(AccAtomicCapture) - READ_FEATURE(AccAtomicCapture::Stmt1) - READ_FEATURE(AccAtomicCapture::Stmt2) - READ_FEATURE(AccAtomicRead) - READ_FEATURE(AccAtomicUpdate) - READ_FEATURE(AccAtomicWrite) - READ_FEATURE(AccBeginBlockDirective) - READ_FEATURE(AccBeginCombinedDirective) - READ_FEATURE(AccBeginLoopDirective) - READ_FEATURE(AccBlockDirective) - READ_FEATURE(AccClause) - READ_FEATURE(AccBindClause) - READ_FEATURE(AccDefaultClause) - READ_FEATURE(AccClauseList) - READ_FEATURE(AccCombinedDirective) - READ_FEATURE(AccDataModifier) - READ_FEATURE(AccDataModifier::Modifier) - READ_FEATURE(AccDeclarativeDirective) - READ_FEATURE(AccEndAtomic) - READ_FEATURE(AccEndBlockDirective) - READ_FEATURE(AccEndCombinedDirective) - READ_FEATURE(AccGangArgument) - READ_FEATURE(AccObject) - READ_FEATURE(AccObjectList) - READ_FEATURE(AccObjectListWithModifier) - READ_FEATURE(AccObjectListWithReduction) - READ_FEATURE(AccReductionOperator) - READ_FEATURE(AccReductionOperator::Operator) - READ_FEATURE(AccSizeExpr) - READ_FEATURE(AccSizeExprList) - READ_FEATURE(AccSelfClause) - READ_FEATURE(AccStandaloneDirective) - READ_FEATURE(AccDeviceTypeExpr) - READ_FEATURE(AccDeviceTypeExprList) - READ_FEATURE(AccTileExpr) - READ_FEATURE(AccTileExprList) - READ_FEATURE(AccLoopDirective) - READ_FEATURE(AccWaitArgument) - READ_FEATURE(AcImpliedDo) - READ_FEATURE(AcImpliedDoControl) - READ_FEATURE(AcValue) - READ_FEATURE(AccessStmt) - READ_FEATURE(AccessId) - READ_FEATURE(AccessSpec) - READ_FEATURE(AccessSpec::Kind) - READ_FEATURE(AcSpec) - READ_FEATURE(ActionStmt) - READ_FEATURE(ActualArg) - READ_FEATURE(ActualArg::PercentRef) - READ_FEATURE(ActualArg::PercentVal) - READ_FEATURE(ActualArgSpec) - READ_FEATURE(AcValue::Triplet) - READ_FEATURE(AllocOpt) - READ_FEATURE(AllocOpt::Mold) - READ_FEATURE(AllocOpt::Source) - READ_FEATURE(Allocatable) - READ_FEATURE(AllocatableStmt) - READ_FEATURE(AllocateCoarraySpec) - READ_FEATURE(AllocateObject) - READ_FEATURE(AllocateShapeSpec) - READ_FEATURE(AllocateStmt) - READ_FEATURE(Allocation) - READ_FEATURE(AltReturnSpec) - READ_FEATURE(ArithmeticIfStmt) - READ_FEATURE(ArrayConstructor) - READ_FEATURE(ArrayElement) - READ_FEATURE(ArraySpec) - READ_FEATURE(AssignStmt) - READ_FEATURE(AssignedGotoStmt) - READ_FEATURE(AssignmentStmt) - READ_FEATURE(AssociateConstruct) - READ_FEATURE(AssociateStmt) - READ_FEATURE(Association) - READ_FEATURE(AssumedImpliedSpec) - READ_FEATURE(AssumedRankSpec) - READ_FEATURE(AssumedShapeSpec) - READ_FEATURE(AssumedSizeSpec) - READ_FEATURE(Asynchronous) - READ_FEATURE(AsynchronousStmt) - READ_FEATURE(AttrSpec) - READ_FEATURE(BOZLiteralConstant) - READ_FEATURE(BackspaceStmt) - READ_FEATURE(BasedPointer) - READ_FEATURE(BasedPointerStmt) - READ_FEATURE(BindAttr) - READ_FEATURE(BindAttr::Deferred) - READ_FEATURE(BindAttr::Non_Overridable) - READ_FEATURE(BindEntity) - READ_FEATURE(BindEntity::Kind) - READ_FEATURE(BindStmt) - READ_FEATURE(Block) - READ_FEATURE(BlockConstruct) - READ_FEATURE(BlockData) - READ_FEATURE(BlockDataStmt) - READ_FEATURE(BlockSpecificationPart) - READ_FEATURE(BlockStmt) - READ_FEATURE(BoundsRemapping) - READ_FEATURE(BoundsSpec) - READ_FEATURE(Call) - READ_FEATURE(CallStmt) - READ_FEATURE(CaseConstruct) - READ_FEATURE(CaseConstruct::Case) - READ_FEATURE(CaseSelector) - READ_FEATURE(CaseStmt) - READ_FEATURE(CaseValueRange) - READ_FEATURE(CaseValueRange::Range) - READ_FEATURE(ChangeTeamConstruct) - READ_FEATURE(ChangeTeamStmt) - READ_FEATURE(CharLength) - READ_FEATURE(CharLiteralConstant) - READ_FEATURE(CharLiteralConstantSubstring) - READ_FEATURE(CharSelector) - READ_FEATURE(CharSelector::LengthAndKind) - READ_FEATURE(CloseStmt) - READ_FEATURE(CloseStmt::CloseSpec) - READ_FEATURE(CoarrayAssociation) - READ_FEATURE(CoarraySpec) - READ_FEATURE(CodimensionDecl) - READ_FEATURE(CodimensionStmt) - READ_FEATURE(CoindexedNamedObject) - READ_FEATURE(CommonBlockObject) - READ_FEATURE(CommonStmt) - READ_FEATURE(CommonStmt::Block) - READ_FEATURE(CompilerDirective) - READ_FEATURE(CompilerDirective::IgnoreTKR) - READ_FEATURE(CompilerDirective::LoopCount) - READ_FEATURE(CompilerDirective::NameValue) - READ_FEATURE(ComplexLiteralConstant) - READ_FEATURE(ComplexPart) - READ_FEATURE(ComponentArraySpec) - READ_FEATURE(ComponentAttrSpec) - READ_FEATURE(ComponentDataSource) - READ_FEATURE(ComponentDecl) - READ_FEATURE(FillDecl) - READ_FEATURE(ComponentOrFill) - READ_FEATURE(ComponentDefStmt) - READ_FEATURE(ComponentSpec) - READ_FEATURE(ComputedGotoStmt) - READ_FEATURE(ConcurrentControl) - READ_FEATURE(ConcurrentHeader) - READ_FEATURE(ConnectSpec) - READ_FEATURE(ConnectSpec::CharExpr) - READ_FEATURE(ConnectSpec::CharExpr::Kind) - READ_FEATURE(ConnectSpec::Newunit) - READ_FEATURE(ConnectSpec::Recl) - READ_FEATURE(ContainsStmt) - READ_FEATURE(Contiguous) - READ_FEATURE(ContiguousStmt) - READ_FEATURE(ContinueStmt) - READ_FEATURE(CriticalConstruct) - READ_FEATURE(CriticalStmt) - READ_FEATURE(CycleStmt) - READ_FEATURE(DataComponentDefStmt) - READ_FEATURE(DataIDoObject) - READ_FEATURE(DataImpliedDo) - READ_FEATURE(DataRef) - READ_FEATURE(DataStmt) - READ_FEATURE(DataStmtConstant) - READ_FEATURE(DataStmtObject) - READ_FEATURE(DataStmtRepeat) - READ_FEATURE(DataStmtSet) - READ_FEATURE(DataStmtValue) - READ_FEATURE(DeallocateStmt) - READ_FEATURE(DeclarationConstruct) - READ_FEATURE(DeclarationTypeSpec) - READ_FEATURE(DeclarationTypeSpec::Class) - READ_FEATURE(DeclarationTypeSpec::ClassStar) - READ_FEATURE(DeclarationTypeSpec::Record) - READ_FEATURE(DeclarationTypeSpec::Type) - READ_FEATURE(DeclarationTypeSpec::TypeStar) - READ_FEATURE(Default) - READ_FEATURE(DeferredCoshapeSpecList) - READ_FEATURE(DeferredShapeSpecList) - READ_FEATURE(DefinedOpName) - READ_FEATURE(DefinedOperator) - READ_FEATURE(DefinedOperator::IntrinsicOperator) - READ_FEATURE(DerivedTypeDef) - READ_FEATURE(DerivedTypeSpec) - READ_FEATURE(DerivedTypeStmt) - READ_FEATURE(Designator) - READ_FEATURE(DimensionStmt) - READ_FEATURE(DimensionStmt::Declaration) - READ_FEATURE(DoConstruct) - READ_FEATURE(DummyArg) - READ_FEATURE(ElseIfStmt) - READ_FEATURE(ElseStmt) - READ_FEATURE(ElsewhereStmt) - READ_FEATURE(EndAssociateStmt) - READ_FEATURE(EndBlockDataStmt) - READ_FEATURE(EndBlockStmt) - READ_FEATURE(EndChangeTeamStmt) - READ_FEATURE(EndCriticalStmt) - READ_FEATURE(EndDoStmt) - READ_FEATURE(EndEnumStmt) - READ_FEATURE(EndForallStmt) - READ_FEATURE(EndFunctionStmt) - READ_FEATURE(EndIfStmt) - READ_FEATURE(EndInterfaceStmt) - READ_FEATURE(EndLabel) - READ_FEATURE(EndModuleStmt) - READ_FEATURE(EndMpSubprogramStmt) - READ_FEATURE(EndProgramStmt) - READ_FEATURE(EndSelectStmt) - READ_FEATURE(EndSubmoduleStmt) - READ_FEATURE(EndSubroutineStmt) - READ_FEATURE(EndTypeStmt) - READ_FEATURE(EndWhereStmt) - READ_FEATURE(EndfileStmt) - READ_FEATURE(EntityDecl) - READ_FEATURE(EntryStmt) - READ_FEATURE(EnumDef) - READ_FEATURE(EnumDefStmt) - READ_FEATURE(Enumerator) - READ_FEATURE(EnumeratorDefStmt) - READ_FEATURE(EorLabel) - READ_FEATURE(EquivalenceObject) - READ_FEATURE(EquivalenceStmt) - READ_FEATURE(ErrLabel) - READ_FEATURE(ErrorRecovery) - READ_FEATURE(EventPostStmt) - READ_FEATURE(EventWaitStmt) - READ_FEATURE(EventWaitStmt::EventWaitSpec) - READ_FEATURE(ExecutableConstruct) - READ_FEATURE(ExecutionPart) - READ_FEATURE(ExecutionPartConstruct) - READ_FEATURE(ExitStmt) - READ_FEATURE(ExplicitCoshapeSpec) - READ_FEATURE(ExplicitShapeSpec) - READ_FEATURE(Expr) - READ_FEATURE(Expr::Parentheses) - READ_FEATURE(Expr::UnaryPlus) - READ_FEATURE(Expr::Negate) - READ_FEATURE(Expr::NOT) - READ_FEATURE(Expr::PercentLoc) - READ_FEATURE(Expr::DefinedUnary) - READ_FEATURE(Expr::Power) - READ_FEATURE(Expr::Multiply) - READ_FEATURE(Expr::Divide) - READ_FEATURE(Expr::Add) - READ_FEATURE(Expr::Subtract) - READ_FEATURE(Expr::Concat) - READ_FEATURE(Expr::LT) - READ_FEATURE(Expr::LE) - READ_FEATURE(Expr::EQ) - READ_FEATURE(Expr::NE) - READ_FEATURE(Expr::GE) - READ_FEATURE(Expr::GT) - READ_FEATURE(Expr::AND) - READ_FEATURE(Expr::OR) - READ_FEATURE(Expr::EQV) - READ_FEATURE(Expr::NEQV) - READ_FEATURE(Expr::DefinedBinary) - READ_FEATURE(Expr::ComplexConstructor) - READ_FEATURE(External) - READ_FEATURE(ExternalStmt) - READ_FEATURE(FailImageStmt) - READ_FEATURE(FileUnitNumber) - READ_FEATURE(FinalProcedureStmt) - READ_FEATURE(FlushStmt) - READ_FEATURE(ForallAssignmentStmt) - READ_FEATURE(ForallBodyConstruct) - READ_FEATURE(ForallConstruct) - READ_FEATURE(ForallConstructStmt) - READ_FEATURE(ForallStmt) - READ_FEATURE(FormTeamStmt) - READ_FEATURE(FormTeamStmt::FormTeamSpec) - READ_FEATURE(Format) - READ_FEATURE(FormatStmt) - READ_FEATURE(FunctionReference) - READ_FEATURE(FunctionStmt) - READ_FEATURE(FunctionSubprogram) - READ_FEATURE(GenericSpec) - READ_FEATURE(GenericSpec::Assignment) - READ_FEATURE(GenericSpec::ReadFormatted) - READ_FEATURE(GenericSpec::ReadUnformatted) - READ_FEATURE(GenericSpec::WriteFormatted) - READ_FEATURE(GenericSpec::WriteUnformatted) - READ_FEATURE(GenericStmt) - READ_FEATURE(GotoStmt) - READ_FEATURE(HollerithLiteralConstant) - READ_FEATURE(IdExpr) - READ_FEATURE(IdVariable) - READ_FEATURE(IfConstruct) - READ_FEATURE(IfConstruct::ElseBlock) - READ_FEATURE(IfConstruct::ElseIfBlock) - READ_FEATURE(IfStmt) - READ_FEATURE(IfThenStmt) - READ_FEATURE(TeamValue) - READ_FEATURE(ImageSelector) - READ_FEATURE(ImageSelectorSpec) - READ_FEATURE(ImageSelectorSpec::Stat) - READ_FEATURE(ImageSelectorSpec::Team_Number) - READ_FEATURE(ImplicitPart) - READ_FEATURE(ImplicitPartStmt) - READ_FEATURE(ImplicitSpec) - READ_FEATURE(ImplicitStmt) - READ_FEATURE(ImplicitStmt::ImplicitNoneNameSpec) - READ_FEATURE(ImpliedShapeSpec) - READ_FEATURE(ImportStmt) - READ_FEATURE(Initialization) - READ_FEATURE(InputImpliedDo) - READ_FEATURE(InputItem) - READ_FEATURE(InquireSpec) - READ_FEATURE(InquireSpec::CharVar) - READ_FEATURE(InquireSpec::CharVar::Kind) - READ_FEATURE(InquireSpec::IntVar) - READ_FEATURE(InquireSpec::IntVar::Kind) - READ_FEATURE(InquireSpec::LogVar) - READ_FEATURE(InquireSpec::LogVar::Kind) - READ_FEATURE(InquireStmt) - READ_FEATURE(InquireStmt::Iolength) - READ_FEATURE(IntegerTypeSpec) - READ_FEATURE(IntentSpec) - READ_FEATURE(IntentSpec::Intent) - READ_FEATURE(IntentStmt) - READ_FEATURE(InterfaceBlock) - READ_FEATURE(InterfaceBody) - READ_FEATURE(InterfaceBody::Function) - READ_FEATURE(InterfaceBody::Subroutine) - READ_FEATURE(InterfaceSpecification) - READ_FEATURE(InterfaceStmt) - READ_FEATURE(InternalSubprogram) - READ_FEATURE(InternalSubprogramPart) - READ_FEATURE(Intrinsic) - READ_FEATURE(IntrinsicStmt) - READ_FEATURE(IntrinsicTypeSpec) - READ_FEATURE(IntrinsicTypeSpec::Character) - READ_FEATURE(IntrinsicTypeSpec::Complex) - READ_FEATURE(IntrinsicTypeSpec::DoubleComplex) - READ_FEATURE(IntrinsicTypeSpec::DoublePrecision) - READ_FEATURE(IntrinsicTypeSpec::Logical) - READ_FEATURE(IntrinsicTypeSpec::Real) - READ_FEATURE(IoControlSpec) - READ_FEATURE(IoControlSpec::Asynchronous) - READ_FEATURE(IoControlSpec::CharExpr) - READ_FEATURE(IoControlSpec::CharExpr::Kind) - READ_FEATURE(IoControlSpec::Pos) - READ_FEATURE(IoControlSpec::Rec) - READ_FEATURE(IoControlSpec::Size) - READ_FEATURE(IoUnit) - READ_FEATURE(Keyword) - READ_FEATURE(KindParam) - READ_FEATURE(KindSelector) - READ_FEATURE(KindSelector::StarSize) - READ_FEATURE(LabelDoStmt) - READ_FEATURE(LanguageBindingSpec) - READ_FEATURE(LengthSelector) - READ_FEATURE(LetterSpec) - READ_FEATURE(LiteralConstant) - READ_FEATURE(IntLiteralConstant) - READ_FEATURE(LocalitySpec) - READ_FEATURE(LocalitySpec::DefaultNone) - READ_FEATURE(LocalitySpec::Local) - READ_FEATURE(LocalitySpec::LocalInit) - READ_FEATURE(LocalitySpec::Shared) - READ_FEATURE(LockStmt) - READ_FEATURE(LockStmt::LockStat) - READ_FEATURE(LogicalLiteralConstant) - READ_FEATURE(LoopControl) - READ_FEATURE(LoopControl::Concurrent) - READ_FEATURE(MainProgram) - READ_FEATURE(Map) - READ_FEATURE(Map::EndMapStmt) - READ_FEATURE(Map::MapStmt) - READ_FEATURE(MaskedElsewhereStmt) - READ_FEATURE(Module) - READ_FEATURE(ModuleStmt) - READ_FEATURE(ModuleSubprogram) - READ_FEATURE(ModuleSubprogramPart) - READ_FEATURE(MpSubprogramStmt) - READ_FEATURE(MsgVariable) - READ_FEATURE(Name) - READ_FEATURE(NamedConstant) - READ_FEATURE(NamedConstantDef) - READ_FEATURE(NamelistStmt) - READ_FEATURE(NamelistStmt::Group) - READ_FEATURE(NonLabelDoStmt) - READ_FEATURE(NoPass) - READ_FEATURE(NullifyStmt) - READ_FEATURE(NullInit) - READ_FEATURE(ObjectDecl) - READ_FEATURE(OldParameterStmt) - READ_FEATURE(OmpAlignedClause) - READ_FEATURE(OmpAtomic) - READ_FEATURE(OmpAtomicCapture) - READ_FEATURE(OmpAtomicCapture::Stmt1) - READ_FEATURE(OmpAtomicCapture::Stmt2) - READ_FEATURE(OmpAtomicRead) - READ_FEATURE(OmpAtomicUpdate) - READ_FEATURE(OmpAtomicWrite) - READ_FEATURE(OmpBeginBlockDirective) - READ_FEATURE(OmpBeginLoopDirective) - READ_FEATURE(OmpBeginSectionsDirective) - READ_FEATURE(OmpBlockDirective) - READ_FEATURE(OmpCancelType) - READ_FEATURE(OmpCancelType::Type) - READ_FEATURE(OmpClause) - READ_FEATURE(OmpClauseList) - READ_FEATURE(OmpCriticalDirective) - READ_FEATURE(OmpDeclareTargetSpecifier) - READ_FEATURE(OmpDeclareTargetWithClause) - READ_FEATURE(OmpDeclareTargetWithList) - READ_FEATURE(OmpDefaultClause) - READ_FEATURE(OmpDefaultClause::Type) - READ_FEATURE(OmpDefaultmapClause) - READ_FEATURE(OmpDefaultmapClause::ImplicitBehavior) - READ_FEATURE(OmpDefaultmapClause::VariableCategory) - READ_FEATURE(OmpDependClause) - READ_FEATURE(OmpDependClause::InOut) - READ_FEATURE(OmpDependClause::Sink) - READ_FEATURE(OmpDependClause::Source) - READ_FEATURE(OmpDependenceType) - READ_FEATURE(OmpDependenceType::Type) - READ_FEATURE(OmpDependSinkVec) - READ_FEATURE(OmpDependSinkVecLength) - READ_FEATURE(OmpEndAtomic) - READ_FEATURE(OmpEndBlockDirective) - READ_FEATURE(OmpEndCriticalDirective) - READ_FEATURE(OmpEndLoopDirective) - READ_FEATURE(OmpEndSectionsDirective) - READ_FEATURE(OmpIfClause) - READ_FEATURE(OmpIfClause::DirectiveNameModifier) - READ_FEATURE(OmpLinearClause) - READ_FEATURE(OmpLinearClause::WithModifier) - READ_FEATURE(OmpLinearClause::WithoutModifier) - READ_FEATURE(OmpLinearModifier) - READ_FEATURE(OmpLinearModifier::Type) - READ_FEATURE(OmpLoopDirective) - READ_FEATURE(OmpMapClause) - READ_FEATURE(OmpMapType) - READ_FEATURE(OmpMapType::Always) - READ_FEATURE(OmpMapType::Type) - READ_FEATURE(OmpObject) - READ_FEATURE(OmpObjectList) - READ_FEATURE(OmpOrderClause) - READ_FEATURE(OmpOrderClause::Type) - READ_FEATURE(OmpOrderModifier) - READ_FEATURE(OmpOrderModifier::Kind) - READ_FEATURE(OmpProcBindClause) - READ_FEATURE(OmpProcBindClause::Type) - READ_FEATURE(OmpReductionClause) - READ_FEATURE(OmpInReductionClause) - READ_FEATURE(OmpReductionCombiner) - READ_FEATURE(OmpReductionCombiner::FunctionCombiner) - READ_FEATURE(OmpReductionInitializerClause) - READ_FEATURE(OmpReductionOperator) - READ_FEATURE(OmpAllocateClause) - READ_FEATURE(OmpAllocateClause::Allocator) - READ_FEATURE(OmpScheduleClause) - READ_FEATURE(OmpScheduleClause::ScheduleType) - READ_FEATURE(OmpDeviceClause) - READ_FEATURE(OmpDeviceClause::DeviceModifier) - READ_FEATURE(OmpDeviceTypeClause) - READ_FEATURE(OmpDeviceTypeClause::Type) - READ_FEATURE(OmpScheduleModifier) - READ_FEATURE(OmpScheduleModifier::Modifier1) - READ_FEATURE(OmpScheduleModifier::Modifier2) - READ_FEATURE(OmpScheduleModifierType) - READ_FEATURE(OmpScheduleModifierType::ModType) - READ_FEATURE(OmpSectionBlocks) - READ_FEATURE(OmpSectionsDirective) - READ_FEATURE(OmpSimpleStandaloneDirective) - READ_FEATURE(Only) - READ_FEATURE(OpenACCAtomicConstruct) - READ_FEATURE(OpenACCBlockConstruct) - READ_FEATURE(OpenACCCacheConstruct) - READ_FEATURE(OpenACCCombinedConstruct) - READ_FEATURE(OpenACCConstruct) - READ_FEATURE(OpenACCDeclarativeConstruct) - READ_FEATURE(OpenACCLoopConstruct) - READ_FEATURE(OpenACCRoutineConstruct) - READ_FEATURE(OpenACCStandaloneDeclarativeConstruct) - READ_FEATURE(OpenACCStandaloneConstruct) - READ_FEATURE(OpenACCWaitConstruct) - READ_FEATURE(OpenMPAtomicConstruct) - READ_FEATURE(OpenMPBlockConstruct) - READ_FEATURE(OpenMPCancelConstruct) - READ_FEATURE(OpenMPCancelConstruct::If) - READ_FEATURE(OpenMPCancellationPointConstruct) - READ_FEATURE(OpenMPConstruct) - READ_FEATURE(OpenMPCriticalConstruct) - READ_FEATURE(OpenMPDeclarativeAllocate) - READ_FEATURE(OpenMPDeclarativeConstruct) - READ_FEATURE(OpenMPDeclareReductionConstruct) - READ_FEATURE(OpenMPDeclareSimdConstruct) - READ_FEATURE(OpenMPDeclareTargetConstruct) - READ_FEATURE(OmpMemoryOrderClause) - READ_FEATURE(OmpAtomicClause) - READ_FEATURE(OmpAtomicClauseList) - READ_FEATURE(OmpAtomicDefaultMemOrderClause) - READ_FEATURE(OmpAtomicDefaultMemOrderClause::Type) - READ_FEATURE(OpenMPFlushConstruct) - READ_FEATURE(OpenMPLoopConstruct) - READ_FEATURE(OpenMPExecutableAllocate) - READ_FEATURE(OpenMPRequiresConstruct) - READ_FEATURE(OpenMPSimpleStandaloneConstruct) - READ_FEATURE(OpenMPStandaloneConstruct) - READ_FEATURE(OpenMPSectionConstruct) - READ_FEATURE(OpenMPSectionsConstruct) - READ_FEATURE(OpenMPThreadprivate) - READ_FEATURE(OpenStmt) - READ_FEATURE(Optional) - READ_FEATURE(OptionalStmt) - READ_FEATURE(OtherSpecificationStmt) - READ_FEATURE(OutputImpliedDo) - READ_FEATURE(OutputItem) - READ_FEATURE(Parameter) - READ_FEATURE(ParameterStmt) - READ_FEATURE(ParentIdentifier) - READ_FEATURE(Pass) - READ_FEATURE(PauseStmt) - READ_FEATURE(Pointer) - READ_FEATURE(PointerAssignmentStmt) - READ_FEATURE(PointerAssignmentStmt::Bounds) - READ_FEATURE(PointerDecl) - READ_FEATURE(PointerObject) - READ_FEATURE(PointerStmt) - READ_FEATURE(PositionOrFlushSpec) - READ_FEATURE(PrefixSpec) - READ_FEATURE(PrefixSpec::Elemental) - READ_FEATURE(PrefixSpec::Impure) - READ_FEATURE(PrefixSpec::Module) - READ_FEATURE(PrefixSpec::Non_Recursive) - READ_FEATURE(PrefixSpec::Pure) - READ_FEATURE(PrefixSpec::Recursive) - READ_FEATURE(PrintStmt) - READ_FEATURE(PrivateStmt) - READ_FEATURE(PrivateOrSequence) - READ_FEATURE(ProcAttrSpec) - READ_FEATURE(ProcComponentAttrSpec) - READ_FEATURE(ProcComponentDefStmt) - READ_FEATURE(ProcComponentRef) - READ_FEATURE(ProcDecl) - READ_FEATURE(ProcInterface) - READ_FEATURE(ProcPointerInit) - READ_FEATURE(ProcedureDeclarationStmt) - READ_FEATURE(ProcedureDesignator) - READ_FEATURE(ProcedureStmt) - READ_FEATURE(ProcedureStmt::Kind) - READ_FEATURE(Program) - READ_FEATURE(ProgramStmt) - READ_FEATURE(ProgramUnit) - READ_FEATURE(Protected) - READ_FEATURE(ProtectedStmt) - READ_FEATURE(ReadStmt) - READ_FEATURE(RealLiteralConstant) - READ_FEATURE(RealLiteralConstant::Real) - READ_FEATURE(Rename) - READ_FEATURE(Rename::Names) - READ_FEATURE(Rename::Operators) - READ_FEATURE(ReturnStmt) - READ_FEATURE(RewindStmt) - READ_FEATURE(Save) - READ_FEATURE(SaveStmt) - READ_FEATURE(SavedEntity) - READ_FEATURE(SavedEntity::Kind) - READ_FEATURE(SectionSubscript) - READ_FEATURE(SelectCaseStmt) - READ_FEATURE(SelectRankCaseStmt) - READ_FEATURE(SelectRankCaseStmt::Rank) - READ_FEATURE(SelectRankConstruct) - READ_FEATURE(SelectRankConstruct::RankCase) - READ_FEATURE(SelectRankStmt) - READ_FEATURE(SelectTypeConstruct) - READ_FEATURE(SelectTypeConstruct::TypeCase) - READ_FEATURE(SelectTypeStmt) - READ_FEATURE(Selector) - READ_FEATURE(SeparateModuleSubprogram) - READ_FEATURE(SequenceStmt) - READ_FEATURE(Sign) - READ_FEATURE(SignedComplexLiteralConstant) - READ_FEATURE(SignedIntLiteralConstant) - READ_FEATURE(SignedRealLiteralConstant) - READ_FEATURE(SpecificationConstruct) - READ_FEATURE(SpecificationExpr) - READ_FEATURE(SpecificationPart) - READ_FEATURE(Star) - READ_FEATURE(StatOrErrmsg) - READ_FEATURE(StatVariable) - READ_FEATURE(StatusExpr) - READ_FEATURE(StmtFunctionStmt) - READ_FEATURE(StopCode) - READ_FEATURE(StopStmt) - READ_FEATURE(StopStmt::Kind) - READ_FEATURE(StructureComponent) - READ_FEATURE(StructureConstructor) - READ_FEATURE(StructureDef) - READ_FEATURE(StructureDef::EndStructureStmt) - READ_FEATURE(StructureField) - READ_FEATURE(StructureStmt) - READ_FEATURE(Submodule) - READ_FEATURE(SubmoduleStmt) - READ_FEATURE(SubroutineStmt) - READ_FEATURE(SubroutineSubprogram) - READ_FEATURE(SubscriptTriplet) - READ_FEATURE(Substring) - READ_FEATURE(SubstringInquiry) - READ_FEATURE(SubstringRange) - READ_FEATURE(Suffix) - READ_FEATURE(SyncAllStmt) - READ_FEATURE(SyncImagesStmt) - READ_FEATURE(SyncImagesStmt::ImageSet) - READ_FEATURE(SyncMemoryStmt) - READ_FEATURE(SyncTeamStmt) - READ_FEATURE(Target) - READ_FEATURE(TargetStmt) - READ_FEATURE(TypeAttrSpec) - READ_FEATURE(TypeAttrSpec::BindC) - READ_FEATURE(TypeAttrSpec::Extends) - READ_FEATURE(TypeBoundGenericStmt) - READ_FEATURE(TypeBoundProcBinding) - READ_FEATURE(TypeBoundProcDecl) - READ_FEATURE(TypeBoundProcedurePart) - READ_FEATURE(TypeBoundProcedureStmt) - READ_FEATURE(TypeBoundProcedureStmt::WithInterface) - READ_FEATURE(TypeBoundProcedureStmt::WithoutInterface) - READ_FEATURE(TypeDeclarationStmt) - READ_FEATURE(TypeGuardStmt) - READ_FEATURE(TypeGuardStmt::Guard) - READ_FEATURE(TypeParamDecl) - READ_FEATURE(TypeParamDefStmt) - READ_FEATURE(common::TypeParamAttr) - READ_FEATURE(TypeParamSpec) - READ_FEATURE(TypeParamValue) - READ_FEATURE(TypeParamValue::Deferred) - READ_FEATURE(TypeSpec) - READ_FEATURE(Union) - READ_FEATURE(Union::EndUnionStmt) - READ_FEATURE(Union::UnionStmt) - READ_FEATURE(UnlockStmt) - READ_FEATURE(UseStmt) - READ_FEATURE(UseStmt::ModuleNature) - READ_FEATURE(Value) - READ_FEATURE(ValueStmt) - READ_FEATURE(Variable) - READ_FEATURE(Verbatim) - READ_FEATURE(Volatile) - READ_FEATURE(VolatileStmt) - READ_FEATURE(WaitSpec) - READ_FEATURE(WaitStmt) - READ_FEATURE(WhereBodyConstruct) - READ_FEATURE(WhereConstruct) - READ_FEATURE(WhereConstruct::Elsewhere) - READ_FEATURE(WhereConstruct::MaskedElsewhere) - READ_FEATURE(WhereConstructStmt) - READ_FEATURE(WhereStmt) - READ_FEATURE(WriteStmt) - - READ_FEATURE(llvm::omp::Directive) - READ_FEATURE(llvm::omp::Clause) - READ_FEATURE(llvm::acc::Directive) - READ_FEATURE(llvm::acc::DefaultValue) - - template bool Pre(const A &) { return true; } - template void Post(const A &) {} - - template bool Pre(const Statement &) { return true; } - template void Post(const Statement &) {} - - template bool Pre(const UnlabeledStatement &) { return true; } - template void Post(const UnlabeledStatement &) {} - - template bool Pre(const common::Indirection &) { - return true; - } - template void Post(const common::Indirection &) {} - - template bool Pre(const Scalar &) { return true; } - template void Post(const Scalar &) {} - - template bool Pre(const Constant &) { return true; } - template void Post(const Constant &) {} - - template bool Pre(const Integer &) { return true; } - template void Post(const Integer &) {} - - template bool Pre(const Logical &) { return true; } - template void Post(const Logical &) {} - - template bool Pre(const DefaultChar &) { return true; } - template void Post(const DefaultChar &) {} - - template bool Pre(const std::tuple &) { return true; } - template void Post(const std::tuple &) {} - - template bool Pre(const std::variant &) { return true; } - template void Post(const std::variant &) {} -}; - -class FeatureListAction : public PluginParseTreeAction { - void executeAction() override { - NodeVisitor visitor; - Fortran::parser::Walk(getParsing().parseTree(), visitor); - - for (auto const &[feature, frequency] : visitor.getFrequencies()) { - llvm::outs() << feature << ": " << frequency << "\n"; - } - } - - bool beginSourceFileAction() override { return runPrescan() && runParse(); } -}; - -static FrontendPluginRegistry::Add X( - "feature-list", "List program features"); diff --git a/flang/include/flang/Semantics/runtime-type-info.h b/flang/include/flang/Semantics/runtime-type-info.h index 76560b98b1c20..e27091cf32de0 100644 --- a/flang/include/flang/Semantics/runtime-type-info.h +++ b/flang/include/flang/Semantics/runtime-type-info.h @@ -42,6 +42,14 @@ RuntimeDerivedTypeTables BuildRuntimeDerivedTypeTables(SemanticsContext &); /// to describe other derived types at runtime in flang descriptor. constexpr char typeInfoBuiltinModule[]{"__fortran_type_info"}; +/// Name of the bindings descriptor component in the DerivedType type of the +/// __Fortran_type_info module +constexpr char bindingDescCompName[]{"binding"}; + +/// Name of the __builtin_c_funptr component in the Binding type of the +/// __Fortran_type_info module +constexpr char procCompName[]{"proc"}; + SymbolVector CollectBindings(const Scope &dtScope); } // namespace Fortran::semantics diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 6b4591789c545..8ea8fa7290372 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -104,10 +104,8 @@ template class FIROpConversion : public mlir::ConvertOpToLLVMPattern { public: explicit FIROpConversion(fir::LLVMTypeConverter &lowering, - const fir::FIRToLLVMPassOptions &options, - const fir::BindingTables &bindingTables) - : mlir::ConvertOpToLLVMPattern(lowering), options(options), - bindingTables(bindingTables) {} + const fir::FIRToLLVMPassOptions &options) + : mlir::ConvertOpToLLVMPattern(lowering), options(options) {} protected: mlir::Type convertType(mlir::Type ty) const { @@ -358,7 +356,6 @@ class FIROpConversion : public mlir::ConvertOpToLLVMPattern { } const fir::FIRToLLVMPassOptions &options; - const fir::BindingTables &bindingTables; }; /// FIR conversion pattern template @@ -970,131 +967,6 @@ struct ConvertOpConversion : public FIROpConversion { } }; -/// Lower `fir.dispatch` operation. A virtual call to a method in a dispatch -/// table. -struct DispatchOpConversion : public FIROpConversion { - using FIROpConversion::FIROpConversion; - - mlir::LogicalResult - matchAndRewrite(fir::DispatchOp dispatch, OpAdaptor adaptor, - mlir::ConversionPatternRewriter &rewriter) const override { - mlir::Location loc = dispatch.getLoc(); - - if (bindingTables.empty()) - return emitError(loc) << "no binding tables found"; - - // Get derived type information. - mlir::Type declaredType = - fir::getDerivedType(dispatch.getObject().getType().getEleTy()); - assert(declaredType.isa() && "expecting fir.type"); - auto recordType = declaredType.dyn_cast(); - - // Lookup for the binding table. - auto bindingsIter = bindingTables.find(recordType.getName()); - if (bindingsIter == bindingTables.end()) - return emitError(loc) - << "cannot find binding table for " << recordType.getName(); - - // Lookup for the binding. - const fir::BindingTable &bindingTable = bindingsIter->second; - auto bindingIter = bindingTable.find(dispatch.getMethod()); - if (bindingIter == bindingTable.end()) - return emitError(loc) - << "cannot find binding for " << dispatch.getMethod(); - unsigned bindingIdx = bindingIter->second; - - mlir::Value passedObject = dispatch.getObject(); - - auto module = dispatch.getOperation()->getParentOfType(); - mlir::Type typeDescTy; - std::string typeDescName = - fir::NameUniquer::getTypeDescriptorName(recordType.getName()); - if (auto global = module.lookupSymbol(typeDescName)) { - typeDescTy = convertType(global.getType()); - } else if (auto global = - module.lookupSymbol(typeDescName)) { - // The global may have already been translated to LLVM. - typeDescTy = global.getType(); - } - - unsigned typeDescFieldId = getTypeDescFieldId(passedObject.getType()); - - auto descPtr = adaptor.getOperands()[0] - .getType() - .dyn_cast(); - - // TODO: the following loads from the type descriptor related - // data structures must have proper TBAA access tags. - // These loads cannot alias with any real data accesses nor - // with any box accesses. Moreover, they can probably be marked - // as reading from constant memory (fourth operand of a TBAA - // tag may be set to true). These accesses probably deserve - // separate sub-root in the TBAA graph. - - // Load the descriptor. - auto desc = rewriter.create( - loc, descPtr.getElementType(), adaptor.getOperands()[0]); - - // Load the type descriptor. - auto typeDescPtr = - rewriter.create(loc, desc, typeDescFieldId); - auto typeDesc = - rewriter.create(loc, typeDescTy, typeDescPtr); - - // Load the bindings descriptor. - auto typeDescStructTy = typeDescTy.dyn_cast(); - auto bindingDescType = - typeDescStructTy.getBody()[0].dyn_cast(); - auto bindingDesc = - rewriter.create(loc, typeDesc, 0); - - // Load the correct binding. - auto bindingType = - bindingDescType.getBody()[0].dyn_cast(); - auto baseBindingPtr = rewriter.create( - loc, bindingDesc, kAddrPosInBox); - auto bindingPtr = rewriter.create( - loc, bindingType, baseBindingPtr, - llvm::ArrayRef{static_cast(bindingIdx)}); - auto binding = rewriter.create( - loc, bindingType.getElementType(), bindingPtr); - - // Get the function type. - llvm::SmallVector argTypes; - for (mlir::Value operand : adaptor.getOperands().drop_front()) - argTypes.push_back(operand.getType()); - mlir::Type resultType; - if (dispatch.getResults().empty()) - resultType = mlir::LLVM::LLVMVoidType::get(dispatch.getContext()); - else - resultType = convertType(dispatch.getResults()[0].getType()); - auto fctType = mlir::LLVM::LLVMFunctionType::get(resultType, argTypes, - /*isVarArg=*/false); - - // Get the function pointer. - auto builtinFuncPtr = - rewriter.create(loc, binding, 0); - auto funcAddr = - rewriter.create(loc, builtinFuncPtr, 0); - auto funcPtr = rewriter.create( - loc, mlir::LLVM::LLVMPointerType::get(fctType), funcAddr); - - // Indirect calls are done with the function pointer as the first operand. - llvm::SmallVector args; - args.push_back(funcPtr); - for (mlir::Value operand : adaptor.getOperands().drop_front()) - args.push_back(operand); - auto callOp = rewriter.replaceOpWithNewOp( - dispatch, - dispatch.getResults().empty() ? mlir::TypeRange{} - : fctType.getReturnType(), - "", args); - callOp.removeCalleeAttr(); // Indirect calls do not have callee attr. - - return mlir::success(); - } -}; - /// `fir.disptach_table` operation has no specific CodeGen. The operation is /// only used to carry information during FIR to FIR passes. struct DispatchTableOpConversion @@ -3656,9 +3528,8 @@ struct NegcOpConversion : public FIROpConversion { template struct MustBeDeadConversion : public FIROpConversion { explicit MustBeDeadConversion(fir::LLVMTypeConverter &lowering, - const fir::FIRToLLVMPassOptions &options, - const fir::BindingTables &bindingTables) - : FIROpConversion(lowering, options, bindingTables) {} + const fir::FIRToLLVMPassOptions &options) + : FIROpConversion(lowering, options) {} using OpAdaptor = typename FromOp::Adaptor; mlir::LogicalResult @@ -3768,9 +3639,6 @@ class FIRToLLVMLowering if (mlir::failed(runPipeline(mathConvertionPM, mod))) return signalPassFailure(); - fir::BindingTables bindingTables; - fir::buildBindingTables(bindingTables, mod); - auto *context = getModule().getContext(); fir::LLVMTypeConverter typeConverter{getModule(), options.applyTBAA || applyTBAA}; @@ -3783,11 +3651,11 @@ class FIRToLLVMLowering BoxProcHostOpConversion, BoxRankOpConversion, BoxTypeCodeOpConversion, BoxTypeDescOpConversion, CallOpConversion, CmpcOpConversion, ConstcOpConversion, ConvertOpConversion, CoordinateOpConversion, - DispatchOpConversion, DispatchTableOpConversion, DTEntryOpConversion, - DivcOpConversion, EmboxOpConversion, EmboxCharOpConversion, - EmboxProcOpConversion, ExtractValueOpConversion, FieldIndexOpConversion, - FirEndOpConversion, FreeMemOpConversion, GlobalLenOpConversion, - GlobalOpConversion, HasValueOpConversion, InsertOnRangeOpConversion, + DispatchTableOpConversion, DTEntryOpConversion, DivcOpConversion, + EmboxOpConversion, EmboxCharOpConversion, EmboxProcOpConversion, + ExtractValueOpConversion, FieldIndexOpConversion, FirEndOpConversion, + FreeMemOpConversion, GlobalLenOpConversion, GlobalOpConversion, + HasValueOpConversion, InsertOnRangeOpConversion, InsertValueOpConversion, IsPresentOpConversion, LenParamIndexOpConversion, LoadOpConversion, MulcOpConversion, NegcOpConversion, NoReassocOpConversion, SelectCaseOpConversion, @@ -3797,7 +3665,7 @@ class FIRToLLVMLowering SubcOpConversion, TypeDescOpConversion, UnboxCharOpConversion, UnboxProcOpConversion, UndefOpConversion, UnreachableOpConversion, XArrayCoorOpConversion, XEmboxOpConversion, XReboxOpConversion, - ZeroOpConversion>(typeConverter, options, bindingTables); + ZeroOpConversion>(typeConverter, options); mlir::populateFuncToLLVMConversionPatterns(typeConverter, pattern); mlir::populateOpenMPToLLVMConversionPatterns(typeConverter, pattern); mlir::arith::populateArithToLLVMConversionPatterns(typeConverter, pattern); diff --git a/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp b/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp index f7ee2c19d45ac..2f8cdf7934436 100644 --- a/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp @@ -6,18 +6,25 @@ // //===----------------------------------------------------------------------===// +#include "flang/Lower/BuiltinModules.h" +#include "flang/Optimizer/Builder/Todo.h" #include "flang/Optimizer/Dialect/FIRDialect.h" #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/Dialect/FIROpsSupport.h" +#include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/Dialect/Support/FIRContext.h" #include "flang/Optimizer/Dialect/Support/KindMapping.h" #include "flang/Optimizer/Support/InternalNames.h" #include "flang/Optimizer/Support/TypeCode.h" +#include "flang/Optimizer/Support/Utils.h" #include "flang/Optimizer/Transforms/Passes.h" #include "flang/Runtime/derived-api.h" +#include "flang/Semantics/runtime-type-info.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/BuiltinOps.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/DialectConversion.h" #include "llvm/ADT/SmallSet.h" @@ -72,6 +79,147 @@ class SelectTypeConv : public OpConversionPattern { std::mutex *moduleMutex; }; +/// Lower `fir.dispatch` operation. A virtual call to a method in a dispatch +/// table. +struct DispatchOpConv : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + DispatchOpConv(mlir::MLIRContext *ctx, const BindingTables &bindingTables) + : mlir::OpConversionPattern(ctx), + bindingTables(bindingTables) {} + + mlir::LogicalResult + matchAndRewrite(fir::DispatchOp dispatch, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + mlir::Location loc = dispatch.getLoc(); + + if (bindingTables.empty()) + return emitError(loc) << "no binding tables found"; + + // Get derived type information. + mlir::Type declaredType = + fir::getDerivedType(dispatch.getObject().getType().getEleTy()); + assert(declaredType.isa() && "expecting fir.type"); + auto recordType = declaredType.dyn_cast(); + + // Lookup for the binding table. + auto bindingsIter = bindingTables.find(recordType.getName()); + if (bindingsIter == bindingTables.end()) + return emitError(loc) + << "cannot find binding table for " << recordType.getName(); + + // Lookup for the binding. + const BindingTable &bindingTable = bindingsIter->second; + auto bindingIter = bindingTable.find(dispatch.getMethod()); + if (bindingIter == bindingTable.end()) + return emitError(loc) + << "cannot find binding for " << dispatch.getMethod(); + unsigned bindingIdx = bindingIter->second; + + mlir::Value passedObject = dispatch.getObject(); + + auto module = dispatch.getOperation()->getParentOfType(); + Type typeDescTy; + std::string typeDescName = + NameUniquer::getTypeDescriptorName(recordType.getName()); + if (auto global = module.lookupSymbol(typeDescName)) { + typeDescTy = global.getType(); + } + + // clang-format off + // Before: + // fir.dispatch "proc1"(%11 : + // !fir.class>>) + + // After: + // %12 = fir.box_tdesc %11 : (!fir.class>>) -> !fir.tdesc + // %13 = fir.convert %12 : (!fir.tdesc) -> !fir.ref> + // %14 = fir.field_index binding, !fir.type<_QM__fortran_type_infoTderivedtype> + // %15 = fir.coordinate_of %13, %14 : (!fir.ref>, !fir.field) -> !fir.ref>>>> + // %bindings = fir.load %15 : !fir.ref>>>> + // %16 = fir.box_addr %bindings : (!fir.box>>>) -> !fir.ptr>> + // %17 = fir.coordinate_of %16, %c0 : (!fir.ptr>>, index) -> !fir.ref> + // %18 = fir.field_index proc, !fir.type<_QM__fortran_type_infoTbinding> + // %19 = fir.coordinate_of %17, %18 : (!fir.ref>, !fir.field) -> !fir.ref> + // %20 = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr> + // %21 = fir.coordinate_of %19, %20 : (!fir.ref>, !fir.field) -> !fir.ref + // %22 = fir.load %21 : !fir.ref + // %23 = fir.convert %22 : (i64) -> (() -> ()) + // fir.call %23() : () -> () + // clang-format on + + // Load the descriptor. + mlir::Type fieldTy = fir::FieldType::get(rewriter.getContext()); + mlir::Type tdescType = + fir::TypeDescType::get(mlir::NoneType::get(rewriter.getContext())); + mlir::Value boxDesc = + rewriter.create(loc, tdescType, passedObject); + boxDesc = rewriter.create( + loc, fir::ReferenceType::get(typeDescTy), boxDesc); + + // Load the bindings descriptor. + auto bindingsCompName = Fortran::semantics::bindingDescCompName; + fir::RecordType typeDescRecTy = typeDescTy.cast(); + mlir::Value field = rewriter.create( + loc, fieldTy, bindingsCompName, typeDescRecTy, mlir::ValueRange{}); + mlir::Type coorTy = + fir::ReferenceType::get(typeDescRecTy.getType(bindingsCompName)); + mlir::Value bindingBoxAddr = + rewriter.create(loc, coorTy, boxDesc, field); + mlir::Value bindingBox = rewriter.create(loc, bindingBoxAddr); + + // Load the correct binding. + mlir::Value bindings = rewriter.create(loc, bindingBox); + fir::RecordType bindingTy = + fir::unwrapIfDerived(bindingBox.getType().cast()); + mlir::Type bindingAddrTy = fir::ReferenceType::get(bindingTy); + mlir::Value bindingIdxVal = rewriter.create( + loc, rewriter.getIndexType(), rewriter.getIndexAttr(bindingIdx)); + mlir::Value bindingAddr = rewriter.create( + loc, bindingAddrTy, bindings, bindingIdxVal); + + // Get the function pointer. + auto procCompName = Fortran::semantics::procCompName; + mlir::Value procField = rewriter.create( + loc, fieldTy, procCompName, bindingTy, mlir::ValueRange{}); + fir::RecordType procTy = + bindingTy.getType(procCompName).cast(); + mlir::Type procRefTy = fir::ReferenceType::get(procTy); + mlir::Value procRef = rewriter.create( + loc, procRefTy, bindingAddr, procField); + + auto addressFieldName = Fortran::lower::builtin::cptrFieldName; + mlir::Value addressField = rewriter.create( + loc, fieldTy, addressFieldName, procTy, mlir::ValueRange{}); + mlir::Type addressTy = procTy.getType(addressFieldName); + mlir::Type addressRefTy = fir::ReferenceType::get(addressTy); + mlir::Value addressRef = rewriter.create( + loc, addressRefTy, procRef, addressField); + mlir::Value address = rewriter.create(loc, addressRef); + + // Get the function type. + llvm::SmallVector argTypes; + for (mlir::Value operand : dispatch.getArgs()) + argTypes.push_back(operand.getType()); + llvm::SmallVector resTypes; + if (!dispatch.getResults().empty()) + resTypes.push_back(dispatch.getResults()[0].getType()); + + mlir::Type funTy = + mlir::FunctionType::get(rewriter.getContext(), argTypes, resTypes); + mlir::Value funcPtr = rewriter.create(loc, funTy, address); + + // Make the call. + llvm::SmallVector args{funcPtr}; + args.append(dispatch.getArgs().begin(), dispatch.getArgs().end()); + rewriter.replaceOpWithNewOp(dispatch, resTypes, nullptr, args); + return mlir::success(); + } + +private: + BindingTables bindingTables; +}; + /// Convert FIR structured control flow ops to CFG ops. class PolymorphicOpConversion : public fir::impl::PolymorphicOpConversionBase { @@ -83,14 +231,21 @@ class PolymorphicOpConversion void runOnOperation() override { auto *context = &getContext(); + auto mod = getOperation()->getParentOfType(); mlir::RewritePatternSet patterns(context); + + BindingTables bindingTables; + buildBindingTables(bindingTables, mod); + patterns.insert(context, moduleMutex); + patterns.insert(context, bindingTables); mlir::ConversionTarget target(*context); target.addLegalDialect(); // apply the patterns target.addIllegalOp(); + target.addIllegalOp(); target.markUnknownOpDynamicallyLegal([](Operation *) { return true; }); if (mlir::failed(mlir::applyPartialConversion(getOperation(), target, std::move(patterns)))) { diff --git a/flang/lib/Semantics/runtime-type-info.cpp b/flang/lib/Semantics/runtime-type-info.cpp index 29f63524b5c07..5e57c70c42fbb 100644 --- a/flang/lib/Semantics/runtime-type-info.cpp +++ b/flang/lib/Semantics/runtime-type-info.cpp @@ -151,7 +151,8 @@ RuntimeTableBuilder::RuntimeTableBuilder( : context_{c}, tables_{t}, derivedTypeSchema_{GetSchema("derivedtype")}, componentSchema_{GetSchema("component")}, procPtrSchema_{GetSchema( "procptrcomponent")}, - valueSchema_{GetSchema("value")}, bindingSchema_{GetSchema("binding")}, + valueSchema_{GetSchema("value")}, bindingSchema_{GetSchema( + bindingDescCompName)}, specialSchema_{GetSchema("specialbinding")}, deferredEnum_{GetEnumValue( "deferred")}, explicitEnum_{GetEnumValue("explicit")}, lenParameterEnum_{GetEnumValue( @@ -562,7 +563,7 @@ const Symbol *RuntimeTableBuilder::DescribeType(Scope &dtScope) { if (!isAbstractType) { std::vector bindings{ DescribeBindings(dtScope, scope)}; - AddValue(dtValues, derivedTypeSchema_, "binding"s, + AddValue(dtValues, derivedTypeSchema_, bindingDescCompName, SaveDerivedPointerTarget(scope, SaveObjectName(".v."s + distinctName), std::move(bindings), evaluate::ConstantSubscripts{ @@ -982,7 +983,7 @@ RuntimeTableBuilder::DescribeBindings(const Scope &dtScope, Scope &scope) { std::vector result; for (const SymbolRef &ref : CollectBindings(dtScope)) { evaluate::StructureConstructorValues values; - AddValue(values, bindingSchema_, "proc"s, + AddValue(values, bindingSchema_, procCompName, SomeExpr{evaluate::ProcedureDesignator{ ref.get().get().symbol()}}); AddValue(values, bindingSchema_, "name"s, @@ -1152,7 +1153,7 @@ void RuntimeTableBuilder::DescribeSpecialProc( values, specialSchema_, "which"s, SomeExpr{std::move(which.value())}); AddValue(values, specialSchema_, "isargdescriptorset"s, IntExpr<1>(isArgDescriptorSet)); - AddValue(values, specialSchema_, "proc"s, + AddValue(values, specialSchema_, procCompName, SomeExpr{evaluate::ProcedureDesignator{specific}}); // index might already be present in the case of an override specials.emplace(*index, diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt index 7d96a72e5f36d..4de1036dfb52b 100644 --- a/flang/test/CMakeLists.txt +++ b/flang/test/CMakeLists.txt @@ -79,7 +79,6 @@ if (LLVM_BUILD_EXAMPLES) list(APPEND FLANG_TEST_DEPENDS flangPrintFunctionNames flangOmpReport - flangFeatureList ) endif () diff --git a/flang/test/Examples/feature-list-class.f90 b/flang/test/Examples/feature-list-class.f90 deleted file mode 100644 index cba361b677f2a..0000000000000 --- a/flang/test/Examples/feature-list-class.f90 +++ /dev/null @@ -1,88 +0,0 @@ -! UNSUPPORTED: system-windows -! REQUIRES: plugins, shell, examples - -! RUN: %flang_fc1 -load %llvmshlibdir/flangFeatureList%pluginext \ -! RUN: -plugin feature-list %s 2>&1 | FileCheck %s - -module list_features_test - implicit none - - type :: test_class_1 - integer :: a - real :: b - contains - procedure :: sum => sum_test_class_1 - procedure :: set => set_values_test_class_1 - end type -contains - real function sum_test_class_1(self) - class(test_class_1), intent(in) :: self - sum_test_class_1 = self%a + self%b - end function - - subroutine set_values_test_class_1(self, a, b) - class(test_class_1), intent(out) :: self - integer, intent(in) :: a, b - self%a = a - self%b = b - end subroutine -end module list_features_test - -! CHECK: Name: 32 -! CHECK-NEXT: DerivedTypeSpec: 2 -! CHECK-NEXT: Expr::Add: 1 -! CHECK-NEXT: IntrinsicTypeSpec: 4 -! CHECK-NEXT: IntegerTypeSpec: 2 -! CHECK-NEXT: IntrinsicTypeSpec::Real: 2 -! CHECK-NEXT: DataRef: 11 -! CHECK-NEXT: StructureComponent: 4 -! CHECK-NEXT: Designator: 7 -! CHECK-NEXT: Expr: 5 -! CHECK-NEXT: Variable: 3 -! CHECK-NEXT: AttrSpec: 3 -! CHECK-NEXT: IntentSpec: 3 -! CHECK-NEXT: IntentSpec::Intent: 3 -! CHECK-NEXT: DummyArg: 3 -! CHECK-NEXT: DeclarationTypeSpec: 6 -! CHECK-NEXT: DeclarationTypeSpec::Class: 2 -! CHECK-NEXT: ImplicitStmt: 1 -! CHECK-NEXT: ImplicitPart: 3 -! CHECK-NEXT: ImplicitPartStmt: 1 -! CHECK-NEXT: PrefixSpec: 1 -! CHECK-NEXT: Module: 1 -! CHECK-NEXT: AssignmentStmt: 3 -! CHECK-NEXT: ActionStmt: 3 -! CHECK-NEXT: Block: 2 -! CHECK-NEXT: TypeBoundProcBinding: 2 -! CHECK-NEXT: TypeBoundProcedureStmt: 2 -! CHECK-NEXT: TypeBoundProcDecl: 2 -! CHECK-NEXT: TypeBoundProcedureStmt::WithoutInterface: 2 -! CHECK-NEXT: ComponentOrFill: 2 -! CHECK-NEXT: ComponentDecl: 2 -! CHECK-NEXT: DataComponentDefStmt: 2 -! CHECK-NEXT: ComponentDefStmt: 2 -! CHECK-NEXT: TypeBoundProcedurePart: 1 -! CHECK-NEXT: ContainsStmt: 2 -! CHECK-NEXT: EndTypeStmt: 1 -! CHECK-NEXT: DerivedTypeDef: 1 -! CHECK-NEXT: DerivedTypeStmt: 1 -! CHECK-NEXT: EntityDecl: 4 -! CHECK-NEXT: SpecificationConstruct: 4 -! CHECK-NEXT: TypeDeclarationStmt: 3 -! CHECK-NEXT: DeclarationConstruct: 4 -! CHECK-NEXT: EndFunctionStmt: 1 -! CHECK-NEXT: FunctionStmt: 1 -! CHECK-NEXT: EndSubroutineStmt: 1 -! CHECK-NEXT: SubroutineStmt: 1 -! CHECK-NEXT: ExecutionPartConstruct: 3 -! CHECK-NEXT: ExecutableConstruct: 3 -! CHECK-NEXT: SpecificationPart: 3 -! CHECK-NEXT: FunctionSubprogram: 1 -! CHECK-NEXT: ExecutionPart: 2 -! CHECK-NEXT: SubroutineSubprogram: 1 -! CHECK-NEXT: ModuleSubprogram: 2 -! CHECK-NEXT: ProgramUnit: 1 -! CHECK-NEXT: Program: 1 -! CHECK-NEXT: ModuleSubprogramPart: 1 -! CHECK-NEXT: EndModuleStmt: 1 -! CHECK-NEXT: ModuleStmt: 1 diff --git a/flang/test/Examples/feature-list-functions.f90 b/flang/test/Examples/feature-list-functions.f90 deleted file mode 100644 index a1913dda697c7..0000000000000 --- a/flang/test/Examples/feature-list-functions.f90 +++ /dev/null @@ -1,76 +0,0 @@ -! UNSUPPORTED: system-windows -! REQUIRES: plugins, shell, examples - -! RUN: %flang_fc1 -load %llvmshlibdir/flangFeatureList%pluginext \ -! RUN: -plugin feature-list %s 2>&1 | FileCheck %s - -program list_features_test - implicit none - call test_sub(test_func(2, 3), 4) -contains - subroutine test_sub(a, b) - integer, intent(in) :: a, b - print "(I0)", a + b - end subroutine - - integer function test_func(a, b) - integer, intent(in) :: a, b - test_func = a * b - end function -end program list_features_test - -! CHECK: Name: 19 -! CHECK-NEXT: IntLiteralConstant: 3 -! CHECK-NEXT: LiteralConstant: 4 -! CHECK-NEXT: CharLiteralConstant: 1 -! CHECK-NEXT: FunctionReference: 1 -! CHECK-NEXT: Call: 2 -! CHECK-NEXT: Expr::Multiply: 1 -! CHECK-NEXT: Expr::Add: 1 -! CHECK-NEXT: IntrinsicTypeSpec: 3 -! CHECK-NEXT: IntegerTypeSpec: 3 -! CHECK-NEXT: Format: 1 -! CHECK-NEXT: DataRef: 5 -! CHECK-NEXT: ProcedureDesignator: 2 -! CHECK-NEXT: Designator: 5 -! CHECK-NEXT: ActualArgSpec: 4 -! CHECK-NEXT: ActualArg: 4 -! CHECK-NEXT: Expr: 11 -! CHECK-NEXT: Variable: 1 -! CHECK-NEXT: AttrSpec: 2 -! CHECK-NEXT: IntentSpec: 2 -! CHECK-NEXT: IntentSpec::Intent: 2 -! CHECK-NEXT: DummyArg: 2 -! CHECK-NEXT: DeclarationTypeSpec: 3 -! CHECK-NEXT: ImplicitStmt: 1 -! CHECK-NEXT: ImplicitPart: 3 -! CHECK-NEXT: ImplicitPartStmt: 1 -! CHECK-NEXT: PrefixSpec: 1 -! CHECK-NEXT: OutputItem: 1 -! CHECK-NEXT: AssignmentStmt: 1 -! CHECK-NEXT: ActionStmt: 3 -! CHECK-NEXT: PrintStmt: 1 -! CHECK-NEXT: CallStmt: 1 -! CHECK-NEXT: Block: 3 -! CHECK-NEXT: ContainsStmt: 1 -! CHECK-NEXT: EntityDecl: 4 -! CHECK-NEXT: SpecificationConstruct: 2 -! CHECK-NEXT: TypeDeclarationStmt: 2 -! CHECK-NEXT: DeclarationConstruct: 2 -! CHECK-NEXT: EndFunctionStmt: 1 -! CHECK-NEXT: FunctionStmt: 1 -! CHECK-NEXT: EndSubroutineStmt: 1 -! CHECK-NEXT: SubroutineStmt: 1 -! CHECK-NEXT: ExecutionPartConstruct: 3 -! CHECK-NEXT: ExecutableConstruct: 3 -! CHECK-NEXT: SpecificationPart: 3 -! CHECK-NEXT: FunctionSubprogram: 1 -! CHECK-NEXT: ExecutionPart: 3 -! CHECK-NEXT: InternalSubprogramPart: 1 -! CHECK-NEXT: InternalSubprogram: 2 -! CHECK-NEXT: SubroutineSubprogram: 1 -! CHECK-NEXT: ProgramUnit: 1 -! CHECK-NEXT: MainProgram: 1 -! CHECK-NEXT: Program: 1 -! CHECK-NEXT: EndProgramStmt: 1 -! CHECK-NEXT: ProgramStmt: 1 diff --git a/flang/test/Fir/dispatch.f90 b/flang/test/Fir/dispatch.f90 index dcb52bed7d967..933c769d3e169 100644 --- a/flang/test/Fir/dispatch.f90 +++ b/flang/test/Fir/dispatch.f90 @@ -1,4 +1,4 @@ -! RUN: bbc -polymorphic-type -emit-fir %s -o - | tco | FileCheck %s +! RUN: bbc -polymorphic-type -emit-fir %s -o - | fir-opt --fir-polymorphic-op | FileCheck %s ! RUN: bbc -polymorphic-type -emit-fir %s -o - | FileCheck %s --check-prefix=BT ! Tests codegen of fir.dispatch operation. This test is intentionally run from @@ -182,105 +182,123 @@ program test_type_to_class end -! CHECK-LABEL: define void @_QMdispatch1Pdisplay_class( -! CHECK-SAME: ptr %[[CLASS:.*]]) +! CHECK-LABEL: func.func @_QMdispatch1Pdisplay_class( +! CHECK-SAME: %[[ARG:.*]]: [[CLASS:!fir.class<.*>>]] -! CHECK-DAG: %[[INT32:.*]] = alloca i32, i64 1 -! CHECK-DAG: %[[REAL:.*]] = alloca float, i64 1 -! CHECK-DAG: %[[I:.*]] = alloca i32, i64 1 +! CHECK-DAG: %[[INT32:.*]] = fir.alloca i32 +! CHECK-DAG: %[[REAL:.*]] = fir.alloca f32 +! CHECK-DAG: %[[I:.*]] = fir.alloca i32 ! Check dynamic dispatch equal to `call p%display2()` with binding index = 2. -! CHECK: %[[LOADED_CLASS:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[CLASS]] -! CHECK: %[[TYPEDESCPTR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOADED_CLASS]], 7 -! CHECK: %[[LOADED_TYPEDESC:.*]] = load %_QM__fortran_type_infoTderivedtype, ptr %[[TYPEDESCPTR]] -! CHECK: %[[DT:.*]] = extractvalue %_QM__fortran_type_infoTderivedtype %[[LOADED_TYPEDESC]], 0 -! CHECK: %[[BINDING_BASE_ADDR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] } %[[DT]], 0 -! CHECK: %[[BINDING_PTR:.*]] = getelementptr %_QM__fortran_type_infoTbinding, ptr %[[BINDING_BASE_ADDR]], i32 2 -! CHECK: %[[LOADED_BINDING:.*]] = load %_QM__fortran_type_infoTbinding, ptr %[[BINDING_PTR]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = extractvalue %_QM__fortran_type_infoTbinding %[[LOADED_BINDING]], 0 -! CHECK: %[[FUNC_ADDR:.*]] = extractvalue %_QM__fortran_builtinsT__builtin_c_funptr %[[BUILTIN_FUNC_PTR]], 0 -! CHECK: %[[FUNC_PTR:.*]] = inttoptr i64 %[[FUNC_ADDR]] to ptr -! CHECK: call void %[[FUNC_PTR]](ptr %[[CLASS]]) +! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG]] : ([[CLASS]]) -> !fir.tdesc +! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> +! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref<[[BINDING_BOX_TYPE]]> +! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : ([[BINDING_BOX_TYPE]]) -> !fir.ptr<[[BINDINGSINFO:.*]]> +! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c2 : (!fir.ptr<[[BINDINGSINFO]]>, index) -> !fir.ref<[[BINDINGINFO:.*]]> +! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref +! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> (([[CLASS]]) -> ()) +! CHECK: fir.call %[[FUNC_PTR]](%[[ARG]]) : ([[CLASS]]) -> () ! Check dynamic dispatch equal to `call p%display1()` with binding index = 1. -! CHECK: %[[LOADED_CLASS:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[CLASS]] -! CHECK: %[[TYPEDESCPTR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOADED_CLASS]], 7 -! CHECK: %[[LOADED_TYPEDESC:.*]] = load %_QM__fortran_type_infoTderivedtype, ptr %[[TYPEDESCPTR]] -! CHECK: %[[DT:.*]] = extractvalue %_QM__fortran_type_infoTderivedtype %[[LOADED_TYPEDESC]], 0 -! CHECK: %[[BINDING_BASE_ADDR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] } %[[DT]], 0 -! CHECK: %[[BINDING_PTR:.*]] = getelementptr %_QM__fortran_type_infoTbinding, ptr %[[BINDING_BASE_ADDR]], i32 1 -! CHECK: %[[LOADED_BINDING:.*]] = load %_QM__fortran_type_infoTbinding, ptr %[[BINDING_PTR]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = extractvalue %_QM__fortran_type_infoTbinding %[[LOADED_BINDING]], 0 -! CHECK: %[[FUNC_ADDR:.*]] = extractvalue %_QM__fortran_builtinsT__builtin_c_funptr %[[BUILTIN_FUNC_PTR]], 0 -! CHECK: %[[FUNC_PTR:.*]] = inttoptr i64 %[[FUNC_ADDR]] to ptr -! CHECK: call void %[[FUNC_PTR]](ptr %[[CLASS]]) +! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG]] : ([[CLASS]]) -> !fir.tdesc +! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> +! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref<[[BINDING_BOX_TYPE]]> +! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : ([[BINDING_BOX_TYPE]]) -> !fir.ptr<[[BINDINGSINFO:.*]]> +! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c1 : (!fir.ptr<[[BINDINGSINFO]]>, index) -> !fir.ref<[[BINDINGINFO:.*]]> +! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref +! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> (([[CLASS]]) -> ()) +! CHECK: fir.call %[[FUNC_PTR]](%[[ARG]]) : ([[CLASS]]) -> () ! Check dynamic dispatch equal to `call p%aproc()` with binding index = 0. -! CHECK: %[[LOADED_CLASS:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[CLASS]] -! CHECK: %[[TYPEDESCPTR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOADED_CLASS]], 7 -! CHECK: %[[LOADED_TYPEDESC:.*]] = load %_QM__fortran_type_infoTderivedtype, ptr %[[TYPEDESCPTR]] -! CHECK: %[[DT:.*]] = extractvalue %_QM__fortran_type_infoTderivedtype %[[LOADED_TYPEDESC]], 0 -! CHECK: %[[BINDING_BASE_ADDR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] } %[[DT]], 0 -! CHECK: %[[BINDING_PTR:.*]] = getelementptr %_QM__fortran_type_infoTbinding, ptr %[[BINDING_BASE_ADDR]], i32 0 -! CHECK: %[[LOADED_BINDING:.*]] = load %_QM__fortran_type_infoTbinding, ptr %[[BINDING_PTR]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = extractvalue %_QM__fortran_type_infoTbinding %[[LOADED_BINDING]], 0 -! CHECK: %[[FUNC_ADDR:.*]] = extractvalue %_QM__fortran_builtinsT__builtin_c_funptr %[[BUILTIN_FUNC_PTR]], 0 -! CHECK: %[[FUNC_PTR:.*]] = inttoptr i64 %[[FUNC_ADDR]] to ptr -! CHECK: call void %[[FUNC_PTR]](ptr %[[CLASS]]) +! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG]] : ([[CLASS]]) -> !fir.tdesc +! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> +! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref<[[BINDING_BOX_TYPE]]> +! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : ([[BINDING_BOX_TYPE]]) -> !fir.ptr<[[BINDINGSINFO:.*]]> +! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c0 : (!fir.ptr<[[BINDINGSINFO]]>, index) -> !fir.ref<[[BINDINGINFO:.*]]> +! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref +! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> (([[CLASS]]) -> ()) +! CHECK: fir.call %[[FUNC_PTR]](%[[ARG]]) : ([[CLASS]]) -> () ! Check dynamic dispatch of a function with result. -! CHECK: %[[LOADED_CLASS:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[CLASS]] -! CHECK: %[[TYPEDESCPTR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOADED_CLASS]], 7 -! CHECK: %[[LOADED_TYPEDESC:.*]] = load %_QM__fortran_type_infoTderivedtype, ptr %[[TYPEDESCPTR]] -! CHECK: %[[DT:.*]] = extractvalue %_QM__fortran_type_infoTderivedtype %[[LOADED_TYPEDESC]], 0 -! CHECK: %[[BINDING_BASE_ADDR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] } %[[DT]], 0 -! CHECK: %[[BINDING_PTR:.*]] = getelementptr %_QM__fortran_type_infoTbinding, ptr %[[BINDING_BASE_ADDR]], i32 3 -! CHECK: %[[LOADED_BINDING:.*]] = load %_QM__fortran_type_infoTbinding, ptr %[[BINDING_PTR]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = extractvalue %_QM__fortran_type_infoTbinding %[[LOADED_BINDING]], 0 -! CHECK: %[[FUNC_ADDR:.*]] = extractvalue %_QM__fortran_builtinsT__builtin_c_funptr %[[BUILTIN_FUNC_PTR]], 0 -! CHECK: %[[FUNC_PTR:.*]] = inttoptr i64 %[[FUNC_ADDR]] to ptr -! CHECK: %[[RET:.*]] = call i32 %[[FUNC_PTR]](ptr %[[CLASS]]) -! CHECK: store i32 %[[RET]], ptr %[[I]] +! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG]] : ([[CLASS]]) -> !fir.tdesc +! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> +! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref<[[BINDING_BOX_TYPE]]> +! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : ([[BINDING_BOX_TYPE]]) -> !fir.ptr<[[BINDINGSINFO:.*]]> +! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c3 : (!fir.ptr<[[BINDINGSINFO]]>, index) -> !fir.ref<[[BINDINGINFO:.*]]> +! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref +! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> (([[CLASS]]) -> i32) +! CHECK: %[[RES:.*]] = fir.call %[[FUNC_PTR]](%[[ARG]]) : ([[CLASS]]) -> i32 ! Check dynamic dispatch of call with passed-object and additional argument -! CHECK: store float 2.500000e+00, ptr %[[REAL]] -! CHECK: %[[LOADED_CLASS:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[CLASS]] -! CHECK: %[[TYPEDESCPTR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOADED_CLASS]], 7 -! CHECK: %[[LOADED_TYPEDESC:.*]] = load %_QM__fortran_type_infoTderivedtype, ptr %[[TYPEDESCPTR]] -! CHECK: %[[DT:.*]] = extractvalue %_QM__fortran_type_infoTderivedtype %[[LOADED_TYPEDESC]], 0 -! CHECK: %[[BINDING_BASE_ADDR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] } %[[DT]], 0 -! CHECK: %[[BINDING_PTR:.*]] = getelementptr %_QM__fortran_type_infoTbinding, ptr %[[BINDING_BASE_ADDR]], i32 6 -! CHECK: %[[LOADED_BINDING:.*]] = load %_QM__fortran_type_infoTbinding, ptr %[[BINDING_PTR]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = extractvalue %_QM__fortran_type_infoTbinding %[[LOADED_BINDING]], 0 -! CHECK: %[[FUNC_ADDR:.*]] = extractvalue %_QM__fortran_builtinsT__builtin_c_funptr %[[BUILTIN_FUNC_PTR]], 0 -! CHECK: %[[FUNC_PTR:.*]] = inttoptr i64 %[[FUNC_ADDR]] to ptr -! CHECK: call void %[[FUNC_PTR]](ptr %[[CLASS]], ptr %[[REAL]]) +! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG]] : ([[CLASS]]) -> !fir.tdesc +! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> +! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref<[[BINDING_BOX_TYPE]]> +! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : ([[BINDING_BOX_TYPE]]) -> !fir.ptr<[[BINDINGSINFO:.*]]> +! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c6 : (!fir.ptr<[[BINDINGSINFO]]>, index) -> !fir.ref<[[BINDINGINFO:.*]]> +! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref +! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> (([[CLASS]], !fir.ref) -> ()) +! CHECK: fir.call %[[FUNC_PTR]](%[[ARG]], %[[REAL]]) : ([[CLASS]], !fir.ref) -> () ! Check dynamic dispatch of a call with NOPASS -! CHECK: %[[LOADED_CLASS:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[CLASS]] -! CHECK: %[[TYPEDESCPTR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOADED_CLASS]], 7 -! CHECK: %[[LOADED_TYPEDESC:.*]] = load %_QM__fortran_type_infoTderivedtype, ptr %[[TYPEDESCPTR]] -! CHECK: %[[DT:.*]] = extractvalue %_QM__fortran_type_infoTderivedtype %[[LOADED_TYPEDESC]], 0 -! CHECK: %[[BINDING_BASE_ADDR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] } %[[DT]], 0 -! CHECK: %[[BINDING_PTR:.*]] = getelementptr %_QM__fortran_type_infoTbinding, ptr %[[BINDING_BASE_ADDR]], i32 4 -! CHECK: %[[LOADED_BINDING:.*]] = load %_QM__fortran_type_infoTbinding, ptr %[[BINDING_PTR]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = extractvalue %_QM__fortran_type_infoTbinding %[[LOADED_BINDING]], 0 -! CHECK: %[[FUNC_ADDR:.*]] = extractvalue %_QM__fortran_builtinsT__builtin_c_funptr %[[BUILTIN_FUNC_PTR]], 0 -! CHECK: %[[FUNC_PTR:.*]] = inttoptr i64 %[[FUNC_ADDR]] to ptr -! CHECK: call void %[[FUNC_PTR]]() - -! CHECK: store i32 1, ptr %[[INT32]] -! CHECK: %[[LOADED_CLASS:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[CLASS]] -! CHECK: %[[TYPEDESCPTR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOADED_CLASS]], 7 -! CHECK: %[[LOADED_TYPEDESC:.*]] = load %_QM__fortran_type_infoTderivedtype, ptr %[[TYPEDESCPTR]] -! CHECK: %[[DT:.*]] = extractvalue %_QM__fortran_type_infoTderivedtype %[[LOADED_TYPEDESC]], 0 -! CHECK: %[[BINDING_BASE_ADDR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] } %[[DT]], 0 -! CHECK: %[[BINDING_PTR:.*]] = getelementptr %_QM__fortran_type_infoTbinding, ptr %[[BINDING_BASE_ADDR]], i32 5 -! CHECK: %[[LOADED_BINDING:.*]] = load %_QM__fortran_type_infoTbinding, ptr %[[BINDING_PTR]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = extractvalue %_QM__fortran_type_infoTbinding %[[LOADED_BINDING]], 0 -! CHECK: %[[FUNC_ADDR:.*]] = extractvalue %_QM__fortran_builtinsT__builtin_c_funptr %[[BUILTIN_FUNC_PTR]], 0 -! CHECK: %[[FUNC_PTR:.*]] = inttoptr i64 %[[FUNC_ADDR]] to ptr -! CHECK: call void %[[FUNC_PTR]](ptr %[[INT32]], ptr %[[CLASS]]) +! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG]] : ([[CLASS]]) -> !fir.tdesc +! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> +! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref<[[BINDING_BOX_TYPE]]> +! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : ([[BINDING_BOX_TYPE]]) -> !fir.ptr<[[BINDINGSINFO:.*]]> +! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c4 : (!fir.ptr<[[BINDINGSINFO]]>, index) -> !fir.ref<[[BINDINGINFO:.*]]> +! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref +! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> (() -> ()) +! CHECK: fir.call %[[FUNC_PTR]]() : () -> () + +! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG]] : ([[CLASS]]) -> !fir.tdesc +! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> +! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref<[[BINDING_BOX_TYPE]]> +! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : ([[BINDING_BOX_TYPE]]) -> !fir.ptr<[[BINDINGSINFO:.*]]> +! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c5 : (!fir.ptr<[[BINDINGSINFO]]>, index) -> !fir.ref<[[BINDINGINFO:.*]]> +! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref +! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> ((!fir.ref, [[CLASS]]) -> ()) +! CHECK: fir.call %[[FUNC_PTR]](%[[INT32]], %[[ARG]]) : (!fir.ref, [[CLASS]]) -> () ! CHECK-LABEL: _QMdispatch1Pno_pass_array ! CHECK-LABEL: _QMdispatch1Pno_pass_array_allocatable diff --git a/flang/test/Lower/allocatable-polymorphic.f90 b/flang/test/Lower/allocatable-polymorphic.f90 index b129c7c8cdbe1..c3c01a39b8606 100644 --- a/flang/test/Lower/allocatable-polymorphic.f90 +++ b/flang/test/Lower/allocatable-polymorphic.f90 @@ -586,9 +586,9 @@ program test_alloc ! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[C1_LOAD]], ptr %{{.*}} ! LLVM: %[[GEP_TDESC_C1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 7 ! LLVM: %[[TDESC_C1:.*]] = load ptr, ptr %[[GEP_TDESC_C1]] -! LLVM: %[[ELEM_SIZE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.}}, i32 0, i32 1 +! LLVM: %[[ELEM_SIZE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 1 ! LLVM: %[[ELEM_SIZE:.*]] = load i64, ptr %[[ELEM_SIZE_GEP]] -! LLVM: %[[TYPE_CODE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.}}, i32 0, i32 4 +! LLVM: %[[TYPE_CODE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 4 ! LLVM: %[[TYPE_CODE:.*]] = load i32, ptr %[[TYPE_CODE_GEP]] ! LLVM: %{{.*}} = insertvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } undef, i64 %[[ELEM_SIZE]], 1 ! LLVM: %[[TRUNC_TYPE_CODE:.*]] = trunc i32 %[[TYPE_CODE]] to i8 @@ -600,9 +600,9 @@ program test_alloc ! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOAD_C2]], ptr %{{.*}} ! LLVM: %[[GEP_TDESC_C2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 7 ! LLVM: %[[TDESC_C2:.*]] = load ptr, ptr %[[GEP_TDESC_C2]] -! LLVM: %[[ELEM_SIZE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.}}, i32 0, i32 1 +! LLVM: %[[ELEM_SIZE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 1 ! LLVM: %[[ELEM_SIZE:.*]] = load i64, ptr %[[ELEM_SIZE_GEP]] -! LLVM: %[[TYPE_CODE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.}}, i32 0, i32 4 +! LLVM: %[[TYPE_CODE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 4 ! LLVM: %[[TYPE_CODE:.*]] = load i32, ptr %[[TYPE_CODE_GEP]] ! LLVM: %{{.*}} = insertvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } undef, i64 %[[ELEM_SIZE]], 1 ! LLVM: %[[TRUNC_TYPE_CODE:.*]] = trunc i32 %[[TYPE_CODE]] to i8 diff --git a/libc/cmake/modules/prepare_libc_gpu_build.cmake b/libc/cmake/modules/prepare_libc_gpu_build.cmake index 1f9b68afd95cd..fe0f4ef4826d5 100644 --- a/libc/cmake/modules/prepare_libc_gpu_build.cmake +++ b/libc/cmake/modules/prepare_libc_gpu_build.cmake @@ -45,7 +45,7 @@ endif() set(LIBC_GPU_TEST_ARCHITECTURE "" CACHE STRING "Architecture for the GPU tests") if(LIBC_GPU_TEST_ARCHITECTURE) message(STATUS "Using user-specified GPU architecture for testing " - "'${LIBC_GPU_TARGET_ARCHITECTURE}'") + "'${LIBC_GPU_TEST_ARCHITECTURE}'") if("${LIBC_GPU_TEST_ARCHITECTURE}" IN_LIST all_amdgpu_architectures) set(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU TRUE) set(LIBC_GPU_TARGET_TRIPLE "amdgcn-amd-amdhsa") diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index b3017338f8260..5c0b3103f5615 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -111,6 +111,8 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdio.remove libc.src.stdio.sprintf libc.src.stdio.snprintf + libc.src.stdio.fprintf + libc.src.stdio.printf # sys/mman.h entrypoints libc.src.sys.mman.madvise @@ -412,10 +414,8 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.stdio.funlockfile libc.src.stdio.fwrite libc.src.stdio.fwrite_unlocked - libc.src.stdio.fprintf libc.src.stdio.getc libc.src.stdio.getc_unlocked - libc.src.stdio.printf libc.src.stdio.sscanf libc.src.stdio.scanf libc.src.stdio.fscanf diff --git a/libc/src/__support/CPP/atomic.h b/libc/src/__support/CPP/atomic.h index b0e90e32dadd2..5514062525cce 100644 --- a/libc/src/__support/CPP/atomic.h +++ b/libc/src/__support/CPP/atomic.h @@ -10,6 +10,7 @@ #define LLVM_LIBC_SRC_SUPPORT_CPP_ATOMIC_H #include "src/__support/macros/attributes.h" +#include "src/__support/macros/properties/architectures.h" #include "type_traits.h" @@ -96,7 +97,14 @@ template struct Atomic { // Issue a thread fence with the given memory ordering. LIBC_INLINE void atomic_thread_fence(MemoryOrder mem_ord) { +// The NVPTX backend currently does not support atomic thread fences so we use a +// full system fence instead. +#ifdef LIBC_TARGET_ARCH_IS_NVPTX + (void)mem_ord; + __nvvm_membar_sys(); +#else __atomic_thread_fence(int(mem_ord)); +#endif } } // namespace cpp diff --git a/libc/src/__support/FPUtil/ManipulationFunctions.h b/libc/src/__support/FPUtil/ManipulationFunctions.h index 27d91c433ac5b..14055ab74dce0 100644 --- a/libc/src/__support/FPUtil/ManipulationFunctions.h +++ b/libc/src/__support/FPUtil/ManipulationFunctions.h @@ -112,7 +112,7 @@ LIBC_INLINE T logb(T x) { } NormalFloat normal(bits); - return normal.exponent; + return static_cast(normal.exponent); } template , int> = 0> diff --git a/libc/src/__support/FPUtil/NearestIntegerOperations.h b/libc/src/__support/FPUtil/NearestIntegerOperations.h index 8265ea1cbb3e5..06aa9484c3f70 100644 --- a/libc/src/__support/FPUtil/NearestIntegerOperations.h +++ b/libc/src/__support/FPUtil/NearestIntegerOperations.h @@ -261,9 +261,9 @@ LIBC_INLINE I rounded_float_to_signed_integer(F x) { } // For all other cases, if `x` can fit in the integer type `I`, - // we just return `x`. Implicit conversion will convert the - // floating point value to the exact integer value. - return x; + // we just return `x`. static_cast will convert the floating + // point value to the exact integer value. + return static_cast(x); } } // namespace internal diff --git a/libc/src/__support/OSUtil/gpu/quick_exit.cpp b/libc/src/__support/OSUtil/gpu/quick_exit.cpp index 56f0427c8d81b..3fab438a357a5 100644 --- a/libc/src/__support/OSUtil/gpu/quick_exit.cpp +++ b/libc/src/__support/OSUtil/gpu/quick_exit.cpp @@ -27,7 +27,7 @@ void quick_exit(int status) { [](rpc::Buffer *) { /* void */ }); #if defined(LIBC_TARGET_ARCH_IS_NVPTX) - asm("exit" ::: "memory"); + asm("exit;" ::: "memory"); #elif defined(LIBC_TARGET_ARCH_IS_AMDGPU) // This will terminate the entire wavefront, may not be valid with divergent // work items. diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index 78bab469c28db..bc9a5d7a237f4 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -40,30 +40,6 @@ function(add_math_entrypoint_object name) ) endfunction() -add_entrypoint_object( - fmaf - SRCS - fmaf.cpp - HDRS - fmaf.h - DEPENDS - libc.src.__support.FPUtil.fma - COMPILE_OPTIONS - -O3 -) - -add_entrypoint_object( - fma - SRCS - fma.cpp - HDRS - fma.h - DEPENDS - libc.src.__support.FPUtil.fma - COMPILE_OPTIONS - -O3 -) - add_math_entrypoint_object(acosf) add_math_entrypoint_object(acoshf) @@ -107,6 +83,9 @@ add_math_entrypoint_object(floor) add_math_entrypoint_object(floorf) add_math_entrypoint_object(floorl) +add_math_entrypoint_object(fma) +add_math_entrypoint_object(fmaf) + add_math_entrypoint_object(fmax) add_math_entrypoint_object(fmaxf) add_math_entrypoint_object(fmaxl) diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 09aefc67d1b51..9fe0fce1c8a67 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -1491,3 +1491,27 @@ add_entrypoint_object( COMPILE_OPTIONS -O3 ) + +add_entrypoint_object( + fmaf + SRCS + fmaf.cpp + HDRS + ../fmaf.h + DEPENDS + libc.src.__support.FPUtil.fma + COMPILE_OPTIONS + -O3 +) + +add_entrypoint_object( + fma + SRCS + fma.cpp + HDRS + ../fma.h + DEPENDS + libc.src.__support.FPUtil.fma + COMPILE_OPTIONS + -O3 +) diff --git a/libc/src/math/generic/acosf.cpp b/libc/src/math/generic/acosf.cpp index 5835dfa617056..41152e06ff1f5 100644 --- a/libc/src/math/generic/acosf.cpp +++ b/libc/src/math/generic/acosf.cpp @@ -56,8 +56,8 @@ LLVM_LIBC_FUNCTION(float, acosf, (float x)) { return r.value(); double xd = static_cast(x); - return fputil::multiply_add(-0x1.5555555555555p-3 * xd, xd * xd, - M_MATH_PI_2 - xd); + return static_cast(fputil::multiply_add( + -0x1.5555555555555p-3 * xd, xd * xd, M_MATH_PI_2 - xd)); } // For |x| <= 0.5, we approximate acosf(x) by: @@ -70,7 +70,7 @@ LLVM_LIBC_FUNCTION(float, acosf, (float x)) { double xsq = xd * xd; double x3 = xd * xsq; double r = asin_eval(xsq); - return fputil::multiply_add(-x3, r, M_MATH_PI_2 - xd); + return static_cast(fputil::multiply_add(-x3, r, M_MATH_PI_2 - xd)); } // |x| > 1, return NaNs. @@ -111,7 +111,7 @@ LLVM_LIBC_FUNCTION(float, acosf, (float x)) { double r3 = asin_eval(u); double r = fputil::multiply_add(cv * u, r3, cv); - return x_sign ? M_MATH_PI - r : r; + return static_cast(x_sign ? M_MATH_PI - r : r); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/acoshf.cpp b/libc/src/math/generic/acoshf.cpp index ac225fe5a808f..f8e5a90a4d056 100644 --- a/libc/src/math/generic/acoshf.cpp +++ b/libc/src/math/generic/acoshf.cpp @@ -68,7 +68,8 @@ LLVM_LIBC_FUNCTION(float, acoshf, (float x)) { double x_d = static_cast(x); // acosh(x) = log(x + sqrt(x^2 - 1)) - return log_eval(x_d + fputil::sqrt(fputil::multiply_add(x_d, x_d, -1.0))); + return static_cast( + log_eval(x_d + fputil::sqrt(fputil::multiply_add(x_d, x_d, -1.0)))); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/asinf.cpp b/libc/src/math/generic/asinf.cpp index c24697cb14727..9b724d3296c84 100644 --- a/libc/src/math/generic/asinf.cpp +++ b/libc/src/math/generic/asinf.cpp @@ -99,7 +99,7 @@ LLVM_LIBC_FUNCTION(float, asinf, (float x)) { double xsq = xd * xd; double x3 = xd * xsq; double r = asin_eval(xsq); - return fputil::multiply_add(x3, r, xd); + return static_cast(fputil::multiply_add(x3, r, xd)); } // |x| > 1, return NaNs. @@ -149,7 +149,7 @@ LLVM_LIBC_FUNCTION(float, asinf, (float x)) { double c3 = c1 * u; double r = asin_eval(u); - return fputil::multiply_add(c3, r, c2); + return static_cast(fputil::multiply_add(c3, r, c2)); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/asinhf.cpp b/libc/src/math/generic/asinhf.cpp index 7063387313ab8..91ecf45667bfc 100644 --- a/libc/src/math/generic/asinhf.cpp +++ b/libc/src/math/generic/asinhf.cpp @@ -27,8 +27,9 @@ LLVM_LIBC_FUNCTION(float, asinhf, (float x)) { if (LIBC_UNLIKELY(x_abs <= 0x3e80'0000U)) { // |x| <= 2^-26 if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) { - return LIBC_UNLIKELY(x_abs == 0) ? x - : (x - 0x1.5555555555555p-3 * x * x * x); + return static_cast(LIBC_UNLIKELY(x_abs == 0) + ? x + : (x - 0x1.5555555555555p-3 * x * x * x)); } double x_d = x; @@ -40,7 +41,7 @@ LLVM_LIBC_FUNCTION(float, asinhf, (float x)) { x_sq, 0.0, -0x1.555555555551ep-3, 0x1.3333333325495p-4, -0x1.6db6db5a7622bp-5, 0x1.f1c70f82928c6p-6, -0x1.6e893934266b7p-6, 0x1.1c0b41d3fbe78p-6, -0x1.c0f47810b3c4fp-7, 0x1.2c8602690143dp-7); - return fputil::multiply_add(x_d, p, x_d); + return static_cast(fputil::multiply_add(x_d, p, x_d)); } const double SIGN[2] = {1.0, -1.0}; @@ -97,9 +98,10 @@ LLVM_LIBC_FUNCTION(float, asinhf, (float x)) { } // asinh(x) = log(x + sqrt(x^2 + 1)) - return x_sign * - log_eval(fputil::multiply_add( - x_d, x_sign, fputil::sqrt(fputil::multiply_add(x_d, x_d, 1.0)))); + return static_cast( + x_sign * + log_eval(fputil::multiply_add( + x_d, x_sign, fputil::sqrt(fputil::multiply_add(x_d, x_d, 1.0))))); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/atanf.cpp b/libc/src/math/generic/atanf.cpp index ff5d4507afa3d..ed7847adb15a1 100644 --- a/libc/src/math/generic/atanf.cpp +++ b/libc/src/math/generic/atanf.cpp @@ -22,7 +22,7 @@ LLVM_LIBC_FUNCTION(float, atanf, (float x)) { if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) { if (xbits.is_inf()) - return opt_barrier(sign ? -M_MATH_PI_2 : M_MATH_PI_2); + return static_cast(opt_barrier(sign ? -M_MATH_PI_2 : M_MATH_PI_2)); else return x; } @@ -52,7 +52,7 @@ LLVM_LIBC_FUNCTION(float, atanf, (float x)) { } } - return atan_eval(x); + return static_cast(atan_eval(x)); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/atanhf.cpp b/libc/src/math/generic/atanhf.cpp index b0c92fa8de87d..0a4512f7622da 100644 --- a/libc/src/math/generic/atanhf.cpp +++ b/libc/src/math/generic/atanhf.cpp @@ -40,8 +40,9 @@ LLVM_LIBC_FUNCTION(float, atanhf, (float x)) { if (LIBC_UNLIKELY(x_abs <= 0x3dcc'0000U)) { // |x| <= 2^-26 if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) { - return LIBC_UNLIKELY(x_abs == 0) ? x - : (x + 0x1.5555555555555p-2 * x * x * x); + return static_cast(LIBC_UNLIKELY(x_abs == 0) + ? x + : (x + 0x1.5555555555555p-2 * x * x * x)); } double xdbl = x; @@ -50,10 +51,10 @@ LLVM_LIBC_FUNCTION(float, atanhf, (float x)) { double pe = fputil::polyeval(x2, 0.0, 0x1.5555555555555p-2, 0x1.999999999999ap-3, 0x1.2492492492492p-3, 0x1.c71c71c71c71cp-4, 0x1.745d1745d1746p-4); - return fputil::multiply_add(xdbl, pe, xdbl); + return static_cast(fputil::multiply_add(xdbl, pe, xdbl)); } double xdbl = x; - return 0.5 * log_eval((xdbl + 1.0) / (xdbl - 1.0)); + return static_cast(0.5 * log_eval((xdbl + 1.0) / (xdbl - 1.0))); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/cosf.cpp b/libc/src/math/generic/cosf.cpp index ef94804bda60d..2e4ca3c4133ff 100644 --- a/libc/src/math/generic/cosf.cpp +++ b/libc/src/math/generic/cosf.cpp @@ -129,8 +129,8 @@ LLVM_LIBC_FUNCTION(float, cosf, (float x)) { sincosf_eval(xd, x_abs, sin_k, cos_k, sin_y, cosm1_y); - return fputil::multiply_add(sin_y, -sin_k, - fputil::multiply_add(cosm1_y, cos_k, cos_k)); + return static_cast(fputil::multiply_add( + sin_y, -sin_k, fputil::multiply_add(cosm1_y, cos_k, cos_k))); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/coshf.cpp b/libc/src/math/generic/coshf.cpp index 1ce1bc300d46b..1cf789a10a8ca 100644 --- a/libc/src/math/generic/coshf.cpp +++ b/libc/src/math/generic/coshf.cpp @@ -47,7 +47,7 @@ LLVM_LIBC_FUNCTION(float, coshf, (float x)) { // but not too small inputs, such as |x| < 2^-2, or |x| < 2^-3. // cosh(x) = (e^x + e^(-x)) / 2. - return exp_pm_eval(x); + return static_cast(exp_pm_eval(x)); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/exp10f.cpp b/libc/src/math/generic/exp10f.cpp index 06509a97fe032..9d07f2c5261ee 100644 --- a/libc/src/math/generic/exp10f.cpp +++ b/libc/src/math/generic/exp10f.cpp @@ -72,7 +72,7 @@ LLVM_LIBC_FUNCTION(float, exp10f, (float x)) { return fputil::multiply_add(x, 0x1.26bb1cp+1f, 1.0f); } - return Exp10Base::powb_lo(x); + return static_cast(Exp10Base::powb_lo(x)); } // Exceptional value. @@ -129,7 +129,7 @@ LLVM_LIBC_FUNCTION(float, exp10f, (float x)) { // 10^x = 2^(mid + hi) * 10^lo // ~ mh * (c0 + p * lo^2) // = (mh * c0) + p * (mh * lo^2) - return multiply_add(p, lo2 * rr.mh, c0 * rr.mh); + return static_cast(multiply_add(p, lo2 * rr.mh, c0 * rr.mh)); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/exp2f.cpp b/libc/src/math/generic/exp2f.cpp index 3c319d288e1c9..15f35d0da82f5 100644 --- a/libc/src/math/generic/exp2f.cpp +++ b/libc/src/math/generic/exp2f.cpp @@ -128,7 +128,7 @@ LLVM_LIBC_FUNCTION(float, exp2f, (float x)) { // = 2^(hi + mid) * 2^lo // ~ mh * (1 + lo * P(lo)) // = mh + (mh*lo) * P(lo) - return fputil::multiply_add(p, dx_sq * mh, c1 * mh); + return static_cast(fputil::multiply_add(p, dx_sq * mh, c1 * mh)); } } // namespace __llvm_libc diff --git a/libc/src/math/fma.cpp b/libc/src/math/generic/fma.cpp similarity index 100% rename from libc/src/math/fma.cpp rename to libc/src/math/generic/fma.cpp diff --git a/libc/src/math/fmaf.cpp b/libc/src/math/generic/fmaf.cpp similarity index 100% rename from libc/src/math/fmaf.cpp rename to libc/src/math/generic/fmaf.cpp diff --git a/libc/src/math/generic/log10.cpp b/libc/src/math/generic/log10.cpp index b2dd29f5f74d1..47569b4758a4b 100644 --- a/libc/src/math/generic/log10.cpp +++ b/libc/src/math/generic/log10.cpp @@ -978,7 +978,7 @@ LLVM_LIBC_FUNCTION(double, log10, (double x)) { // |R * x_m - 1| < C uint64_t x_u = xbits.uintval(); int shifted = x_u >> 45; - size_t index = shifted & 0x7F; + int index = shifted & 0x7F; double r = R[index]; x_e += (x_u >> 52) & 0x7FF; diff --git a/libc/src/math/generic/log1pf.cpp b/libc/src/math/generic/log1pf.cpp index bf6a91a5fc466..a7ca54887d59a 100644 --- a/libc/src/math/generic/log1pf.cpp +++ b/libc/src/math/generic/log1pf.cpp @@ -150,7 +150,7 @@ LLVM_LIBC_FUNCTION(float, log1pf, (float x)) { double c2 = fputil::multiply_add(xd, COEFFS[5], COEFFS[4]); double r = fputil::polyeval(xsq, xd, c0, c1, c2, COEFFS[6]); - return r; + return static_cast(r); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/sincosf.cpp b/libc/src/math/generic/sincosf.cpp index 8448945a71d5d..277126bdc89db 100644 --- a/libc/src/math/generic/sincosf.cpp +++ b/libc/src/math/generic/sincosf.cpp @@ -193,10 +193,10 @@ LLVM_LIBC_FUNCTION(void, sincosf, (float x, float *sinp, float *cosp)) { sincosf_eval(xd, x_abs, sin_k, cos_k, sin_y, cosm1_y); - *sinp = fputil::multiply_add(sin_y, cos_k, - fputil::multiply_add(cosm1_y, sin_k, sin_k)); - *cosp = fputil::multiply_add(sin_y, -sin_k, - fputil::multiply_add(cosm1_y, cos_k, cos_k)); + *sinp = static_cast(fputil::multiply_add( + sin_y, cos_k, fputil::multiply_add(cosm1_y, sin_k, sin_k))); + *cosp = static_cast(fputil::multiply_add( + sin_y, -sin_k, fputil::multiply_add(cosm1_y, cos_k, cos_k))); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/sinf.cpp b/libc/src/math/generic/sinf.cpp index 1641c44e9fc00..697c438c2c67b 100644 --- a/libc/src/math/generic/sinf.cpp +++ b/libc/src/math/generic/sinf.cpp @@ -121,7 +121,7 @@ LLVM_LIBC_FUNCTION(float, sinf, (float x)) { double result = fputil::polyeval(xsq, 1.0, -0x1.55555555554c6p-3, 0x1.1111111085e65p-7, -0x1.a019f70fb4d4fp-13, 0x1.718d179815e74p-19); - return xd * result; + return static_cast(xd * result); } if (LIBC_UNLIKELY(x_abs == 0x4619'9998U)) { // x = 0x1.33333p13 @@ -150,8 +150,8 @@ LLVM_LIBC_FUNCTION(float, sinf, (float x)) { sincosf_eval(xd, x_abs, sin_k, cos_k, sin_y, cosm1_y); - return fputil::multiply_add(sin_y, cos_k, - fputil::multiply_add(cosm1_y, sin_k, sin_k)); + return static_cast(fputil::multiply_add( + sin_y, cos_k, fputil::multiply_add(cosm1_y, sin_k, sin_k))); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/sinhf.cpp b/libc/src/math/generic/sinhf.cpp index cc3811deeb9de..92edd4ea6a98f 100644 --- a/libc/src/math/generic/sinhf.cpp +++ b/libc/src/math/generic/sinhf.cpp @@ -21,7 +21,8 @@ LLVM_LIBC_FUNCTION(float, sinhf, (float x)) { // |x| <= 2^-26 if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) { - return LIBC_UNLIKELY(x_abs == 0) ? x : (x + 0.25 * x * x * x); + return static_cast( + LIBC_UNLIKELY(x_abs == 0) ? x : (x + 0.25 * x * x * x)); } // When |x| >= 90, or x is inf or nan @@ -65,11 +66,11 @@ LLVM_LIBC_FUNCTION(float, sinhf, (float x)) { // Therefore, output of Sollya = x * pe; double pe = fputil::polyeval(x2, 0.0, 0x1.5555555556583p-3, 0x1.111110d239f1fp-7, 0x1.a02b5a284013cp-13); - return fputil::multiply_add(xdbl, pe, xdbl); + return static_cast(fputil::multiply_add(xdbl, pe, xdbl)); } // sinh(x) = (e^x - e^(-x)) / 2. - return exp_pm_eval(x); + return static_cast(exp_pm_eval(x)); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/tanf.cpp b/libc/src/math/generic/tanf.cpp index 217664f8b2acb..681f16177fde5 100644 --- a/libc/src/math/generic/tanf.cpp +++ b/libc/src/math/generic/tanf.cpp @@ -90,7 +90,7 @@ LLVM_LIBC_FUNCTION(float, tanf, (float x)) { double result = fputil::polyeval(xsq, 1.0, 0x1.555555553d022p-2, 0x1.111111ce442c1p-3, 0x1.ba180a6bbdecdp-5, 0x1.69c0a88a0b71fp-6); - return xd * result; + return static_cast(xd * result); } // Check for exceptional values @@ -134,8 +134,9 @@ LLVM_LIBC_FUNCTION(float, tanf, (float x)) { // tan(x) = sin(x) / cos(x) // = (sin_y * cos_k + cos_y * sin_k) / (cos_y * cos_k - sin_y * sin_k) using fputil::multiply_add; - return multiply_add(sin_y, cos_k, multiply_add(cosm1_y, sin_k, sin_k)) / - multiply_add(sin_y, -sin_k, multiply_add(cosm1_y, cos_k, cos_k)); + return static_cast( + multiply_add(sin_y, cos_k, multiply_add(cosm1_y, sin_k, sin_k)) / + multiply_add(sin_y, -sin_k, multiply_add(cosm1_y, cos_k, cos_k))); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/tanhf.cpp b/libc/src/math/generic/tanhf.cpp index e67eadf3ce4b7..3b8506f809c3b 100644 --- a/libc/src/math/generic/tanhf.cpp +++ b/libc/src/math/generic/tanhf.cpp @@ -22,8 +22,8 @@ LLVM_LIBC_FUNCTION(float, tanhf, (float x)) { // |x| <= 2^-26 if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) { - return LIBC_UNLIKELY(x_abs == 0) ? x - : (x - 0x1.5555555555555p-2 * x * x * x); + return static_cast( + LIBC_UNLIKELY(x_abs == 0) ? x : (x - 0x1.5555555555555p-2 * x * x * x)); } // When |x| >= 15, or x is inf or nan @@ -48,7 +48,7 @@ LLVM_LIBC_FUNCTION(float, tanhf, (float x)) { double pe = fputil::polyeval(x2, 0.0, -0x1.5555555555555p-2, 0x1.1111111111111p-3, -0x1.ba1ba1ba1ba1cp-5, 0x1.664f4882c10fap-6, -0x1.226e355e6c23dp-7); - return fputil::multiply_add(xdbl, pe, xdbl); + return static_cast(fputil::multiply_add(xdbl, pe, xdbl)); } if (LIBC_UNLIKELY(xbits.bits == 0x4058'e0a3U)) { @@ -65,7 +65,7 @@ LLVM_LIBC_FUNCTION(float, tanhf, (float x)) { fputil::multiply_add(ep.mh, r, 1.0); #else double exp_x = ep.mh * r; - return (exp_x - 1.0) / (exp_x + 1.0); + return static_cast((exp_x - 1.0) / (exp_x + 1.0)); #endif // LIBC_TARGET_CPU_HAS_FMA } diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt index 5f8d17953f633..7ccbf9aa28c4c 100644 --- a/libc/src/stdio/CMakeLists.txt +++ b/libc/src/stdio/CMakeLists.txt @@ -480,29 +480,42 @@ add_entrypoint_object( libc.src.stdio.printf_core.writer ) +list(APPEND printf_deps + libc.src.__support.arg_list + libc.src.stdio.printf_core.vfprintf_internal +) +if(LLVM_LIBC_FULL_BUILD) + list(APPEND printf_deps + libc.src.__support.File.file + libc.src.__support.File.platform_file + ) +else() + set(printf_copts "-DLIBC_COPT_PRINTF_USE_SYSTEM_FILE") +endif() + add_entrypoint_object( - fprintf + printf SRCS - fprintf.cpp + printf.cpp HDRS - fprintf.h + printf.h DEPENDS - libc.src.__support.arg_list - libc.src.stdio.printf_core.vfprintf_internal + ${printf_deps} + COMPILE_OPTIONS + ${printf_copts} ) - add_entrypoint_object( - printf + fprintf SRCS - printf.cpp + fprintf.cpp HDRS - printf.h + fprintf.h DEPENDS - libc.src.__support.File.file - libc.src.__support.File.platform_file libc.src.__support.arg_list libc.src.stdio.printf_core.vfprintf_internal + COMPILE_OPTIONS + ${printf_copts} ) add_entrypoint_object( diff --git a/libc/src/stdio/fprintf.cpp b/libc/src/stdio/fprintf.cpp index 796d5b5c47095..da8fabf5ab542 100644 --- a/libc/src/stdio/fprintf.cpp +++ b/libc/src/stdio/fprintf.cpp @@ -13,9 +13,16 @@ #include "src/stdio/printf_core/vfprintf_internal.h" #include +#include namespace __llvm_libc { +#ifndef LIBC_COPT_PRINTF_USE_SYSTEM_FILE +using FileT = __llvm_libc::File; +#else // defined(LIBC_COPT_PRINTF_USE_SYSTEM_FILE) +using FileT = ::FILE; +#endif // LIBC_COPT_PRINTF_USE_SYSTEM_FILE + LLVM_LIBC_FUNCTION(int, fprintf, (::FILE *__restrict stream, const char *__restrict format, ...)) { @@ -25,7 +32,8 @@ LLVM_LIBC_FUNCTION(int, fprintf, // and pointer semantics, as well as handling // destruction automatically. va_end(vlist); - int ret_val = printf_core::vfprintf_internal(stream, format, args); + int ret_val = printf_core::vfprintf_internal( + reinterpret_cast(stream), format, args); return ret_val; } diff --git a/libc/src/stdio/printf.cpp b/libc/src/stdio/printf.cpp index 8fd8b9cc57fad..ca6f61ed63033 100644 --- a/libc/src/stdio/printf.cpp +++ b/libc/src/stdio/printf.cpp @@ -8,11 +8,18 @@ #include "src/stdio/printf.h" -#include "src/__support/File/file.h" #include "src/__support/arg_list.h" #include "src/stdio/printf_core/vfprintf_internal.h" #include +#include + +#ifndef LIBC_COPT_PRINTF_USE_SYSTEM_FILE +#include "src/__support/File/file.h" +#define PRINTF_STDOUT __llvm_libc::stdout +#else // LIBC_COPT_PRINTF_USE_SYSTEM_FILE +#define PRINTF_STDOUT ::stdout +#endif // LIBC_COPT_PRINTF_USE_SYSTEM_FILE namespace __llvm_libc { @@ -23,8 +30,7 @@ LLVM_LIBC_FUNCTION(int, printf, (const char *__restrict format, ...)) { // and pointer semantics, as well as handling // destruction automatically. va_end(vlist); - int ret_val = printf_core::vfprintf_internal( - reinterpret_cast<::FILE *>(__llvm_libc::stdout), format, args); + int ret_val = printf_core::vfprintf_internal(PRINTF_STDOUT, format, args); return ret_val; } diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt index 31db8ad3c524c..109399772b53d 100644 --- a/libc/src/stdio/printf_core/CMakeLists.txt +++ b/libc/src/stdio/printf_core/CMakeLists.txt @@ -116,35 +116,31 @@ add_object_library( libc.src.__support.arg_list ) -if(NOT (TARGET libc.src.__support.File.file)) - # Not all platforms have a file implementation. If file is unvailable, - # then we must skip all file based printf sections. +if(NOT (TARGET libc.src.__support.File.file) AND LLVM_LIBC_FULL_BUILD) + # Not all platforms have a file implementation. If file is unvailable, and a + # full build is requested, then we must skip all file based printf sections. return() endif() -add_object_library( +add_header_library( file_writer - SRCS - file_writer.cpp HDRS file_writer.h DEPENDS + libc.include.stdio libc.src.__support.File.file libc.src.__support.CPP.string_view libc.src.string.memory_utils.memset_implementation .core_structs ) -add_object_library( +add_header_library( vfprintf_internal - SRCS - vfprintf_internal.cpp HDRS vfprintf_internal.h DEPENDS libc.include.stdio libc.src.__support.File.file - libc.src.__support.File.platform_file libc.src.__support.arg_list libc.src.stdio.printf_core.printf_main libc.src.stdio.printf_core.file_writer diff --git a/libc/src/stdio/printf_core/file_writer.cpp b/libc/src/stdio/printf_core/file_writer.cpp deleted file mode 100644 index 0e07e1c1eb8a7..0000000000000 --- a/libc/src/stdio/printf_core/file_writer.cpp +++ /dev/null @@ -1,54 +0,0 @@ -//===-- FILE Writer implementation for printf -------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "src/stdio/printf_core/file_writer.h" -#include "src/__support/CPP/string_view.h" -#include "src/__support/File/file.h" -#include "src/stdio/printf_core/core_structs.h" -#include - -namespace __llvm_libc { -namespace printf_core { - -int FileWriter::write(const char *__restrict to_write, size_t len) { - auto result = file->write_unlocked(to_write, len); - int written = result.value; - if (written != static_cast(len) || result.has_error()) - written = FILE_WRITE_ERROR; - if (file->error_unlocked()) - written = FILE_STATUS_ERROR; - return written; -} - -int FileWriter::write_str(void *raw_pointer, cpp::string_view new_string) { - FileWriter *file_writer = reinterpret_cast(raw_pointer); - return file_writer->write(new_string.data(), new_string.size()); -} - -int FileWriter::write_chars(void *raw_pointer, char new_char, size_t len) { - FileWriter *file_writer = reinterpret_cast(raw_pointer); - constexpr size_t BUFF_SIZE = 8; - char buff[BUFF_SIZE] = {new_char}; - int result; - while (len > BUFF_SIZE) { - result = file_writer->write(buff, BUFF_SIZE); - if (result < 0) - return result; - len -= BUFF_SIZE; - } - return file_writer->write(buff, len); -} - -// TODO(michaelrj): Move this to putc_unlocked once that is available. -int FileWriter::write_char(void *raw_pointer, char new_char) { - FileWriter *file_writer = reinterpret_cast(raw_pointer); - return file_writer->write(&new_char, 1); -} - -} // namespace printf_core -} // namespace __llvm_libc diff --git a/libc/src/stdio/printf_core/file_writer.h b/libc/src/stdio/printf_core/file_writer.h index 6ba1428a160e2..0fd6d115ddd8b 100644 --- a/libc/src/stdio/printf_core/file_writer.h +++ b/libc/src/stdio/printf_core/file_writer.h @@ -11,6 +11,8 @@ #include "src/__support/CPP/string_view.h" #include "src/__support/File/file.h" +#include "src/__support/macros/attributes.h" // For LIBC_INLINE +#include "src/stdio/printf_core/core_structs.h" #include #include @@ -18,26 +20,81 @@ namespace __llvm_libc { namespace printf_core { -class FileWriter { - __llvm_libc::File *file; +template class FileWriter { + file_t *file; public: - FileWriter(::FILE *init_file) { - file = reinterpret_cast<__llvm_libc::File *>(init_file); - file->lock(); - } + LIBC_INLINE FileWriter(file_t *init_file); - ~FileWriter() { file->unlock(); } + LIBC_INLINE ~FileWriter(); - int write(const char *__restrict to_write, size_t len); + LIBC_INLINE int write(const char *__restrict to_write, size_t len); // These write functions take a FileWriter as a void* in raw_pointer, and // call the appropriate write function on it. - static int write_str(void *raw_pointer, cpp::string_view new_string); - static int write_chars(void *raw_pointer, char new_char, size_t len); - static int write_char(void *raw_pointer, char new_char); + static int write_str(void *raw_pointer, cpp::string_view new_string) { + FileWriter *file_writer = reinterpret_cast(raw_pointer); + return file_writer->write(new_string.data(), new_string.size()); + } + static int write_chars(void *raw_pointer, char new_char, size_t len) { + FileWriter *file_writer = reinterpret_cast(raw_pointer); + constexpr size_t BUFF_SIZE = 8; + char buff[BUFF_SIZE] = {new_char}; + int result; + while (len > BUFF_SIZE) { + result = file_writer->write(buff, BUFF_SIZE); + if (result < 0) + return result; + len -= BUFF_SIZE; + } + return file_writer->write(buff, len); + } + static int write_char(void *raw_pointer, char new_char) { + FileWriter *file_writer = reinterpret_cast(raw_pointer); + return file_writer->write(&new_char, 1); + } }; +// The interface for using our internal file implementation. +template <> +LIBC_INLINE +FileWriter<__llvm_libc::File>::FileWriter(__llvm_libc::File *init_file) { + file = init_file; + file->lock(); +} +template <> LIBC_INLINE FileWriter<__llvm_libc::File>::~FileWriter() { + file->unlock(); +} +template <> +LIBC_INLINE int +FileWriter<__llvm_libc::File>::write(const char *__restrict to_write, + size_t len) { + auto result = file->write_unlocked(to_write, len); + size_t written = result.value; + if (written != len || result.has_error()) + written = FILE_WRITE_ERROR; + if (file->error_unlocked()) + written = FILE_STATUS_ERROR; + return written; +} + +// The interface for using the system's file implementation. +template <> LIBC_INLINE FileWriter<::FILE>::FileWriter(::FILE *init_file) { + file = init_file; + ::flockfile(file); +} +template <> LIBC_INLINE FileWriter<::FILE>::~FileWriter() { + ::funlockfile(file); +} +template <> +LIBC_INLINE int FileWriter<::FILE>::write(const char *__restrict to_write, + size_t len) { + size_t written = ::fwrite_unlocked(to_write, 1, len, file); + if (written != len || ::ferror_unlocked(file)) + written = FILE_WRITE_ERROR; + return written; +} + } // namespace printf_core } // namespace __llvm_libc diff --git a/libc/src/stdio/printf_core/vfprintf_internal.cpp b/libc/src/stdio/printf_core/vfprintf_internal.cpp deleted file mode 100644 index b25d545e54a11..0000000000000 --- a/libc/src/stdio/printf_core/vfprintf_internal.cpp +++ /dev/null @@ -1,32 +0,0 @@ -//===-- Internal implementation of vfprintf ---------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "src/stdio/printf_core/vfprintf_internal.h" - -#include "src/__support/arg_list.h" -#include "src/stdio/printf_core/file_writer.h" -#include "src/stdio/printf_core/printf_main.h" -#include "src/stdio/printf_core/writer.h" - -#include - -namespace __llvm_libc { -namespace printf_core { - -int vfprintf_internal(::FILE *__restrict stream, const char *__restrict format, - internal::ArgList &args) { - FileWriter file_writer(stream); - printf_core::Writer writer(reinterpret_cast(&file_writer), - printf_core::FileWriter::write_str, - printf_core::FileWriter::write_chars, - printf_core::FileWriter::write_char); - return printf_core::printf_main(&writer, format, args); -} - -} // namespace printf_core -} // namespace __llvm_libc diff --git a/libc/src/stdio/printf_core/vfprintf_internal.h b/libc/src/stdio/printf_core/vfprintf_internal.h index b837ebba182b4..762018f0b04c4 100644 --- a/libc/src/stdio/printf_core/vfprintf_internal.h +++ b/libc/src/stdio/printf_core/vfprintf_internal.h @@ -9,15 +9,29 @@ #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_VFPRINTF_INTERNAL_H #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_VFPRINTF_INTERNAL_H +#include "src/__support/File/file.h" #include "src/__support/arg_list.h" +#include "src/__support/macros/attributes.h" // For LIBC_INLINE +#include "src/stdio/printf_core/file_writer.h" +#include "src/stdio/printf_core/printf_main.h" +#include "src/stdio/printf_core/writer.h" #include namespace __llvm_libc { namespace printf_core { -int vfprintf_internal(::FILE *__restrict stream, const char *__restrict format, - internal::ArgList &args); +template +LIBC_INLINE int vfprintf_internal(file_t *__restrict stream, + const char *__restrict format, + internal::ArgList &args) { + FileWriter file_writer(stream); + Writer writer(reinterpret_cast(&file_writer), + FileWriter::write_str, FileWriter::write_chars, + FileWriter::write_char); + return printf_main(&writer, format, args); +} + } // namespace printf_core } // namespace __llvm_libc diff --git a/libc/startup/gpu/nvptx/CMakeLists.txt b/libc/startup/gpu/nvptx/CMakeLists.txt index f7f58ec702bf2..96ab7540cedb1 100644 --- a/libc/startup/gpu/nvptx/CMakeLists.txt +++ b/libc/startup/gpu/nvptx/CMakeLists.txt @@ -8,6 +8,7 @@ add_startup_object( -nogpulib # Do not include any GPU vendor libraries. -nostdinc -x cuda # Use the CUDA toolchain to emit the `_start` kernel. + -fgpu-rdc # Emit relocatable device code from CUDA. --offload-device-only --offload-arch=${LIBC_GPU_TARGET_ARCHITECTURE} NO_GPU_BUNDLE # Compile this file directly without special GPU handling. @@ -15,4 +16,8 @@ add_startup_object( get_fq_target_name(crt1 fq_name) # Ensure that clang uses the correct linker for this object type. -target_link_libraries(${fq_name} PUBLIC "--target=${LIBC_GPU_TARGET_TRIPLE}") +target_link_libraries(${fq_name} + PUBLIC + "-march=${LIBC_GPU_TARGET_ARCHITECTURE}" + "--target=${LIBC_GPU_TARGET_TRIPLE}" +) diff --git a/libc/startup/gpu/nvptx/start.cpp b/libc/startup/gpu/nvptx/start.cpp index 61569423c7b55..cf4077c3d9edd 100644 --- a/libc/startup/gpu/nvptx/start.cpp +++ b/libc/startup/gpu/nvptx/start.cpp @@ -6,10 +6,11 @@ // //===----------------------------------------------------------------------===// -extern "C" __attribute__((device)) int main(int argc, char **argv); +extern "C" __attribute__((device)) int main(int argc, char **argv, char **envp); // TODO: We shouldn't need to use the CUDA language to emit a kernel for NVPTX. extern "C" [[gnu::visibility("protected")]] __attribute__((global)) void -_start(int argc, char **argv, int *ret) { - __atomic_fetch_or(ret, main(argc, argv), __ATOMIC_RELAXED); +_start(int argc, char **argv, char **envp, int *ret, void *in, void *out, + void *buffer) { + __atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED); } diff --git a/libc/test/src/stdio/CMakeLists.txt b/libc/test/src/stdio/CMakeLists.txt index 8747f18f9045b..a4b5a9be892f1 100644 --- a/libc/test/src/stdio/CMakeLists.txt +++ b/libc/test/src/stdio/CMakeLists.txt @@ -134,6 +134,8 @@ add_libc_unittest( libc.src.stdio.snprintf ) +# In fullbuild mode, fprintf's tests use the internal FILE for other functions. +if(LLVM_LIBC_FULL_BUILD) add_libc_unittest( fprintf_test SUITE @@ -147,7 +149,20 @@ add_libc_unittest( libc.src.stdio.fopen libc.src.stdio.fread ) - +else() +# Else in overlay mode they use the system's FILE. +add_libc_unittest( + fprintf_test + SUITE + libc_stdio_unittests + SRCS + fprintf_test.cpp + DEPENDS + libc.src.stdio.fprintf + COMPILE_OPTIONS + -DLIBC_COPT_PRINTF_USE_SYSTEM_FILE +) +endif() add_libc_unittest( printf_test diff --git a/libc/test/src/stdio/fprintf_test.cpp b/libc/test/src/stdio/fprintf_test.cpp index 286c516fbcf96..20b3c0faed6f7 100644 --- a/libc/test/src/stdio/fprintf_test.cpp +++ b/libc/test/src/stdio/fprintf_test.cpp @@ -6,10 +6,12 @@ // //===----------------------------------------------------------------------===// +#ifndef LIBC_COPT_PRINTF_USE_SYSTEM_FILE #include "src/stdio/fclose.h" #include "src/stdio/ferror.h" #include "src/stdio/fopen.h" #include "src/stdio/fread.h" +#endif // LIBC_COPT_PRINTF_USE_SYSTEM_FILE #include "src/stdio/fprintf.h" @@ -17,9 +19,23 @@ #include +namespace printf_test { +#ifndef LIBC_COPT_PRINTF_USE_SYSTEM_FILE +using __llvm_libc::fclose; +using __llvm_libc::ferror; +using __llvm_libc::fopen; +using __llvm_libc::fread; +#else // defined(LIBC_COPT_PRINTF_USE_SYSTEM_FILE) +using ::fclose; +using ::ferror; +using ::fopen; +using ::fread; +#endif // LIBC_COPT_PRINTF_USE_SYSTEM_FILE +} // namespace printf_test + TEST(LlvmLibcFPrintfTest, WriteToFile) { constexpr char FILENAME[] = "testdata/fprintf_output.test"; - ::FILE *file = __llvm_libc::fopen(FILENAME, "w"); + ::FILE *file = printf_test::fopen(FILENAME, "w"); ASSERT_FALSE(file == nullptr); int written; @@ -37,31 +53,31 @@ TEST(LlvmLibcFPrintfTest, WriteToFile) { written = __llvm_libc::fprintf(file, format_more, short_numbers); EXPECT_EQ(written, 14); - ASSERT_EQ(0, __llvm_libc::fclose(file)); + ASSERT_EQ(0, printf_test::fclose(file)); - file = __llvm_libc::fopen(FILENAME, "r"); + file = printf_test::fopen(FILENAME, "r"); ASSERT_FALSE(file == nullptr); char data[50]; - ASSERT_EQ(__llvm_libc::fread(data, 1, sizeof(simple) - 1, file), + ASSERT_EQ(printf_test::fread(data, 1, sizeof(simple) - 1, file), sizeof(simple) - 1); data[sizeof(simple) - 1] = '\0'; ASSERT_STREQ(data, simple); - ASSERT_EQ(__llvm_libc::fread(data, 1, sizeof(numbers) - 1, file), + ASSERT_EQ(printf_test::fread(data, 1, sizeof(numbers) - 1, file), sizeof(numbers) - 1); data[sizeof(numbers) - 1] = '\0'; ASSERT_STREQ(data, numbers); - ASSERT_EQ(__llvm_libc::fread( + ASSERT_EQ(printf_test::fread( data, 1, sizeof(format_more) + sizeof(short_numbers) - 4, file), sizeof(format_more) + sizeof(short_numbers) - 4); data[sizeof(format_more) + sizeof(short_numbers) - 4] = '\0'; ASSERT_STREQ(data, "1234 and more\n"); - ASSERT_EQ(__llvm_libc::ferror(file), 0); + ASSERT_EQ(printf_test::ferror(file), 0); written = __llvm_libc::fprintf(file, "Writing to a read only file should fail."); EXPECT_LT(written, 0); - ASSERT_EQ(__llvm_libc::fclose(file), 0); + ASSERT_EQ(printf_test::fclose(file), 0); } diff --git a/libcxx/docs/ReleaseNotes.rst b/libcxx/docs/ReleaseNotes.rst index aa14e6bfcd97d..8083ba337fc16 100644 --- a/libcxx/docs/ReleaseNotes.rst +++ b/libcxx/docs/ReleaseNotes.rst @@ -62,7 +62,8 @@ Deprecations and Removals includes are removed based on the language version used. Incidental transitive inclusions of the following headers have been removed: - - C++2b: ``atomic``, ``bit``, ``cstring``, ``type_traits`` + - C++2b: ``atomic``, ``bit``, ``cstdint``, ``cstdlib``, ``cstring``, ``initializer_list``, ``new``, ``stdexcept``, + ``type_traits``, ``typeinfo`` - The headers ```` and ```` have been removed, since all the contents have been implemented in namespace ``std`` for at least two releases. diff --git a/libcxx/docs/Status/Cxx2b.rst b/libcxx/docs/Status/Cxx2b.rst index 471b992fdc03b..3fbbb10f3e30b 100644 --- a/libcxx/docs/Status/Cxx2b.rst +++ b/libcxx/docs/Status/Cxx2b.rst @@ -43,6 +43,7 @@ Paper Status .. [#note-P1413R3] P1413R3: ``std::aligned_storage_t`` and ``std::aligned_union_t`` are marked deprecated, but clang doesn't issue a diagnostic for deprecated using template declarations. .. [#note-P2520R0] P2520R0: Libc++ implemented this paper as a DR in C++20 as well. + .. [#note-P2711R1] P2711R1: ``join_with_view`` hasn't been done yet since this type isn't implemented yet. .. _issues-status-cxx2b: diff --git a/libcxx/docs/Status/Cxx2bPapers.csv b/libcxx/docs/Status/Cxx2bPapers.csv index e51ee27deb3c1..900130cfdd506 100644 --- a/libcxx/docs/Status/Cxx2bPapers.csv +++ b/libcxx/docs/Status/Cxx2bPapers.csv @@ -108,7 +108,7 @@ "`P0290R4 `__","LWG", "``apply()`` for ``synchronized_value``","February 2023","","","|concurrency TS|" "`P2770R0 `__","LWG", "Stashing stashing ``iterators`` for proper flattening","February 2023","","","|ranges|" "`P2164R9 `__","LWG", "``views::enumerate``","February 2023","","","|ranges|" -"`P2711R1 `__","LWG", "Making multi-param constructors of ``views`` ``explicit``","February 2023","","","|ranges|" +"`P2711R1 `__","LWG", "Making multi-param constructors of ``views`` ``explicit``","February 2023","|Partial| [#note-P2711R1]_","","|ranges|" "`P2609R3 `__","LWG", "Relaxing Ranges Just A Smidge","February 2023","","","|ranges|" "`P2713R1 `__","LWG", "Escaping improvements in ``std::format``","February 2023","","","|format|" "`P2675R1 `__","LWG", "``format``'s width estimation is too approximate and not forward compatible","February 2023","","","|format|" diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index f8c52328ccff0..8232784cb6c7e 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -294,6 +294,7 @@ set(files __concepts/semiregular.h __concepts/swappable.h __concepts/totally_ordered.h + __condition_variable/condition_variable.h __config __coroutine/coroutine_handle.h __coroutine/coroutine_traits.h @@ -474,7 +475,10 @@ set(files __memory_resource/pool_options.h __memory_resource/synchronized_pool_resource.h __memory_resource/unsynchronized_pool_resource.h - __mutex_base + __mutex/lock_guard.h + __mutex/mutex.h + __mutex/tag_types.h + __mutex/unique_lock.h __node_handle __numeric/accumulate.h __numeric/adjacent_difference.h diff --git a/libcxx/include/__availability b/libcxx/include/__availability index c03d373cafb51..5978dabdacb5f 100644 --- a/libcxx/include/__availability +++ b/libcxx/include/__availability @@ -149,13 +149,6 @@ // # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch // # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore - // This controls the availability of the C++20 format library. - // The library is in development and not ABI stable yet. P2216 is - // retroactively accepted in C++20. This paper contains ABI breaking - // changes. -# define _LIBCPP_AVAILABILITY_FORMAT -// # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_format - // This controls whether the library claims to provide a default verbose // termination function, and consequently whether the headers will try // to use it when the mechanism isn't overriden at compile-time. @@ -259,10 +252,6 @@ # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore # endif -# define _LIBCPP_AVAILABILITY_FORMAT \ - __attribute__((unavailable)) -# define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_format - # define _LIBCPP_HAS_NO_VERBOSE_ABORT_IN_LIBRARY #else diff --git a/libcxx/include/__chrono/formatter.h b/libcxx/include/__chrono/formatter.h index ee7cf93fc79b9..f6e0f9602fdd9 100644 --- a/libcxx/include/__chrono/formatter.h +++ b/libcxx/include/__chrono/formatter.h @@ -542,7 +542,7 @@ __format_chrono(const _Tp& __value, } // namespace __formatter template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __formatter_chrono { +struct _LIBCPP_TEMPLATE_VIS __formatter_chrono { public: _LIBCPP_HIDE_FROM_ABI constexpr auto __parse( basic_format_parse_context<_CharT>& __parse_ctx, __format_spec::__fields __fields, __format_spec::__flags __flags) @@ -582,7 +582,7 @@ struct formatter, _CharT> : public __formatter_c }; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -594,7 +594,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -606,7 +606,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -618,7 +618,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -630,7 +630,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -642,7 +642,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -654,7 +654,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -666,7 +666,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -678,7 +678,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -690,7 +690,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -702,7 +702,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -714,7 +714,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -726,7 +726,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -738,7 +738,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -750,7 +750,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; diff --git a/libcxx/include/__chrono/ostream.h b/libcxx/include/__chrono/ostream.h index 23feb9d711303..2f34115c729a3 100644 --- a/libcxx/include/__chrono/ostream.h +++ b/libcxx/include/__chrono/ostream.h @@ -93,7 +93,7 @@ _LIBCPP_HIDE_FROM_ABI auto __units_suffix() { } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const duration<_Rep, _Period>& __d) { basic_ostringstream<_CharT, _Traits> __s; __s.flags(__os.flags()); @@ -104,8 +104,7 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, const duration<_Rep, _Period>& } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, const day& __d) { +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const day& __d) { return __os << (__d.ok() ? std::format(_LIBCPP_STATICALLY_WIDEN(_CharT, "{:%d}"), __d) // Note this error differs from the wording of the Standard. The // Standard wording doesn't work well on AIX or Windows. There @@ -117,7 +116,7 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, const day& __d) { } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const month& __m) { return __os << (__m.ok() ? std::format(__os.getloc(), _LIBCPP_STATICALLY_WIDEN(_CharT, "{:L%b}"), __m) : std::format(__os.getloc(), @@ -126,14 +125,14 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, const month& __m) { } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const year& __y) { return __os << (__y.ok() ? std::format(_LIBCPP_STATICALLY_WIDEN(_CharT, "{:%Y}"), __y) : std::format(_LIBCPP_STATICALLY_WIDEN(_CharT, "{:%Y} is not a valid year"), __y)); } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const weekday& __wd) { return __os << (__wd.ok() ? std::format(__os.getloc(), _LIBCPP_STATICALLY_WIDEN(_CharT, "{:L%a}"), __wd) : std::format(__os.getloc(), // TODO FMT Standard mandated locale isn't used. @@ -142,7 +141,7 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, const weekday& __wd) { } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const weekday_indexed& __wdi) { auto __i = __wdi.index(); return __os << (__i >= 1 && __i <= 5 @@ -154,13 +153,13 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, const weekday_indexed& __wdi) { } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const weekday_last& __wdl) { return __os << std::format(__os.getloc(), _LIBCPP_STATICALLY_WIDEN(_CharT, "{:L}[last]"), __wdl.weekday()); } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const month_day& __md) { // TODO FMT The Standard allows 30th of February to be printed. // It would be nice to show an error message instead. @@ -168,47 +167,47 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, const month_day& __md) { } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const month_day_last& __mdl) { return __os << std::format(__os.getloc(), _LIBCPP_STATICALLY_WIDEN(_CharT, "{:L}/last"), __mdl.month()); } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const month_weekday& __mwd) { return __os << std::format( __os.getloc(), _LIBCPP_STATICALLY_WIDEN(_CharT, "{:L}/{:L}"), __mwd.month(), __mwd.weekday_indexed()); } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const month_weekday_last& __mwdl) { return __os << std::format( __os.getloc(), _LIBCPP_STATICALLY_WIDEN(_CharT, "{:L}/{:L}"), __mwdl.month(), __mwdl.weekday_last()); } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const year_month& __ym) { return __os << std::format(__os.getloc(), _LIBCPP_STATICALLY_WIDEN(_CharT, "{}/{:L}"), __ym.year(), __ym.month()); } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const year_month_day& __ymd) { return __os << (__ymd.ok() ? std::format(_LIBCPP_STATICALLY_WIDEN(_CharT, "{:%F}"), __ymd) : std::format(_LIBCPP_STATICALLY_WIDEN(_CharT, "{:%F} is not a valid date"), __ymd)); } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const year_month_day_last& __ymdl) { return __os << std::format( __os.getloc(), _LIBCPP_STATICALLY_WIDEN(_CharT, "{}/{:L}"), __ymdl.year(), __ymdl.month_day_last()); } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const year_month_weekday& __ymwd) { return __os << std::format( __os.getloc(), @@ -219,7 +218,7 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, const year_month_weekday& __ymw } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const year_month_weekday_last& __ymwdl) { return __os << std::format( __os.getloc(), @@ -230,7 +229,7 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, const year_month_weekday_last& } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const hh_mm_ss<_Duration> __hms) { return __os << std::format(__os.getloc(), _LIBCPP_STATICALLY_WIDEN(_CharT, "{:L%T}"), __hms); } diff --git a/libcxx/include/__condition_variable/condition_variable.h b/libcxx/include/__condition_variable/condition_variable.h new file mode 100644 index 0000000000000..926effbb23e8e --- /dev/null +++ b/libcxx/include/__condition_variable/condition_variable.h @@ -0,0 +1,243 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___CONDITION_VARIABLE_CONDITION_VARIABLE_H +#define _LIBCPP___CONDITION_VARIABLE_CONDITION_VARIABLE_H + +#include <__chrono/steady_clock.h> +#include <__chrono/system_clock.h> +#include <__chrono/time_point.h> +#include <__config> +#include <__mutex/mutex.h> +#include <__mutex/unique_lock.h> +#include <__threading_support> +#include <__type_traits/enable_if.h> +#include <__type_traits/is_floating_point.h> +#include <__utility/move.h> +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +#ifndef _LIBCPP_HAS_NO_THREADS + +// enum class cv_status +_LIBCPP_DECLARE_STRONG_ENUM(cv_status){no_timeout, timeout}; +_LIBCPP_DECLARE_STRONG_ENUM_EPILOG(cv_status) + +class _LIBCPP_TYPE_VIS condition_variable { + __libcpp_condvar_t __cv_ = _LIBCPP_CONDVAR_INITIALIZER; + +public: + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR condition_variable() _NOEXCEPT = default; + +# ifdef _LIBCPP_HAS_TRIVIAL_CONDVAR_DESTRUCTION + ~condition_variable() = default; +# else + ~condition_variable(); +# endif + + condition_variable(const condition_variable&) = delete; + condition_variable& operator=(const condition_variable&) = delete; + + void notify_one() _NOEXCEPT; + void notify_all() _NOEXCEPT; + + void wait(unique_lock& __lk) _NOEXCEPT; + template + _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS void wait(unique_lock& __lk, _Predicate __pred); + + template + _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS cv_status + wait_until(unique_lock& __lk, const chrono::time_point<_Clock, _Duration>& __t); + + template + _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS bool + wait_until(unique_lock& __lk, const chrono::time_point<_Clock, _Duration>& __t, _Predicate __pred); + + template + _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS cv_status + wait_for(unique_lock& __lk, const chrono::duration<_Rep, _Period>& __d); + + template + bool _LIBCPP_HIDE_FROM_ABI + wait_for(unique_lock& __lk, const chrono::duration<_Rep, _Period>& __d, _Predicate __pred); + + typedef __libcpp_condvar_t* native_handle_type; + _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() { return &__cv_; } + +private: + void + __do_timed_wait(unique_lock& __lk, chrono::time_point) _NOEXCEPT; +# if defined(_LIBCPP_HAS_COND_CLOCKWAIT) + void + __do_timed_wait(unique_lock& __lk, chrono::time_point) _NOEXCEPT; +# endif + template + void __do_timed_wait(unique_lock& __lk, chrono::time_point<_Clock, chrono::nanoseconds>) _NOEXCEPT; +}; +#endif // !_LIBCPP_HAS_NO_THREADS + +template +inline _LIBCPP_HIDE_FROM_ABI __enable_if_t::value, chrono::nanoseconds> +__safe_nanosecond_cast(chrono::duration<_Rep, _Period> __d) { + using namespace chrono; + using __ratio = ratio_divide<_Period, nano>; + using __ns_rep = nanoseconds::rep; + _Rep __result_float = __d.count() * __ratio::num / __ratio::den; + + _Rep __result_max = numeric_limits<__ns_rep>::max(); + if (__result_float >= __result_max) { + return nanoseconds::max(); + } + + _Rep __result_min = numeric_limits<__ns_rep>::min(); + if (__result_float <= __result_min) { + return nanoseconds::min(); + } + + return nanoseconds(static_cast<__ns_rep>(__result_float)); +} + +template +inline _LIBCPP_HIDE_FROM_ABI __enable_if_t::value, chrono::nanoseconds> +__safe_nanosecond_cast(chrono::duration<_Rep, _Period> __d) { + using namespace chrono; + if (__d.count() == 0) { + return nanoseconds(0); + } + + using __ratio = ratio_divide<_Period, nano>; + using __ns_rep = nanoseconds::rep; + __ns_rep __result_max = numeric_limits<__ns_rep>::max(); + if (__d.count() > 0 && __d.count() > __result_max / __ratio::num) { + return nanoseconds::max(); + } + + __ns_rep __result_min = numeric_limits<__ns_rep>::min(); + if (__d.count() < 0 && __d.count() < __result_min / __ratio::num) { + return nanoseconds::min(); + } + + __ns_rep __result = __d.count() * __ratio::num / __ratio::den; + if (__result == 0) { + return nanoseconds(1); + } + + return nanoseconds(__result); +} + +#ifndef _LIBCPP_HAS_NO_THREADS +template +void condition_variable::wait(unique_lock& __lk, _Predicate __pred) { + while (!__pred()) + wait(__lk); +} + +template +cv_status condition_variable::wait_until(unique_lock& __lk, const chrono::time_point<_Clock, _Duration>& __t) { + using namespace chrono; + using __clock_tp_ns = time_point<_Clock, nanoseconds>; + + typename _Clock::time_point __now = _Clock::now(); + if (__t <= __now) + return cv_status::timeout; + + __clock_tp_ns __t_ns = __clock_tp_ns(std::__safe_nanosecond_cast(__t.time_since_epoch())); + + __do_timed_wait(__lk, __t_ns); + return _Clock::now() < __t ? cv_status::no_timeout : cv_status::timeout; +} + +template +bool condition_variable::wait_until( + unique_lock& __lk, const chrono::time_point<_Clock, _Duration>& __t, _Predicate __pred) { + while (!__pred()) { + if (wait_until(__lk, __t) == cv_status::timeout) + return __pred(); + } + return true; +} + +template +cv_status condition_variable::wait_for(unique_lock& __lk, const chrono::duration<_Rep, _Period>& __d) { + using namespace chrono; + if (__d <= __d.zero()) + return cv_status::timeout; + using __ns_rep = nanoseconds::rep; + steady_clock::time_point __c_now = steady_clock::now(); + +# if defined(_LIBCPP_HAS_COND_CLOCKWAIT) + using __clock_tp_ns = time_point; + __ns_rep __now_count_ns = std::__safe_nanosecond_cast(__c_now.time_since_epoch()).count(); +# else + using __clock_tp_ns = time_point; + __ns_rep __now_count_ns = std::__safe_nanosecond_cast(system_clock::now().time_since_epoch()).count(); +# endif + + __ns_rep __d_ns_count = std::__safe_nanosecond_cast(__d).count(); + + if (__now_count_ns > numeric_limits<__ns_rep>::max() - __d_ns_count) { + __do_timed_wait(__lk, __clock_tp_ns::max()); + } else { + __do_timed_wait(__lk, __clock_tp_ns(nanoseconds(__now_count_ns + __d_ns_count))); + } + + return steady_clock::now() - __c_now < __d ? cv_status::no_timeout : cv_status::timeout; +} + +template +inline bool +condition_variable::wait_for(unique_lock& __lk, const chrono::duration<_Rep, _Period>& __d, _Predicate __pred) { + return wait_until(__lk, chrono::steady_clock::now() + __d, std::move(__pred)); +} + +# if defined(_LIBCPP_HAS_COND_CLOCKWAIT) +inline void condition_variable::__do_timed_wait( + unique_lock& __lk, chrono::time_point __tp) _NOEXCEPT { + using namespace chrono; + if (!__lk.owns_lock()) + __throw_system_error(EPERM, "condition_variable::timed wait: mutex not locked"); + nanoseconds __d = __tp.time_since_epoch(); + timespec __ts; + seconds __s = duration_cast(__d); + using __ts_sec = decltype(__ts.tv_sec); + const __ts_sec __ts_sec_max = numeric_limits<__ts_sec>::max(); + if (__s.count() < __ts_sec_max) { + __ts.tv_sec = static_cast<__ts_sec>(__s.count()); + __ts.tv_nsec = (__d - __s).count(); + } else { + __ts.tv_sec = __ts_sec_max; + __ts.tv_nsec = giga::num - 1; + } + int __ec = pthread_cond_clockwait(&__cv_, __lk.mutex()->native_handle(), CLOCK_MONOTONIC, &__ts); + if (__ec != 0 && __ec != ETIMEDOUT) + __throw_system_error(__ec, "condition_variable timed_wait failed"); +} +# endif // _LIBCPP_HAS_COND_CLOCKWAIT + +template +inline void condition_variable::__do_timed_wait(unique_lock& __lk, + chrono::time_point<_Clock, chrono::nanoseconds> __tp) _NOEXCEPT { + wait_for(__lk, __tp - _Clock::now()); +} + +#endif // _LIBCPP_HAS_NO_THREADS + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___CONDITION_VARIABLE_CONDITION_VARIABLE_H diff --git a/libcxx/include/__config b/libcxx/include/__config index 3d3664eb83ae7..b9076073ab250 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -847,6 +847,12 @@ _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_END_NAMESPACE_STD # define _LIBCPP_EXPLICIT_SINCE_CXX14 explicit # endif +# if _LIBCPP_STD_VER >= 23 +# define _LIBCPP_EXPLICIT_SINCE_CXX23 explicit +# else +# define _LIBCPP_EXPLICIT_SINCE_CXX23 +# endif + # if _LIBCPP_STD_VER >= 14 # define _LIBCPP_CONSTEXPR_SINCE_CXX14 constexpr # else diff --git a/libcxx/include/__format/container_adaptor.h b/libcxx/include/__format/container_adaptor.h index 9439b10c29914..5b95f600f0cdc 100644 --- a/libcxx/include/__format/container_adaptor.h +++ b/libcxx/include/__format/container_adaptor.h @@ -37,7 +37,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD // adaptor headers. To use the format functions users already include . template -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __formatter_container_adaptor { +struct _LIBCPP_TEMPLATE_VIS __formatter_container_adaptor { private: using __maybe_const_container = __fmt_maybe_const; using __maybe_const_adaptor = __maybe_const, _Adaptor>; @@ -57,15 +57,15 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __formatter_container_ad }; template _Container> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter, _CharT> +struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_container_adaptor, _CharT> {}; template -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter, _CharT> +struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_container_adaptor, _CharT> {}; template _Container> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter, _CharT> +struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_container_adaptor, _CharT> {}; #endif //_LIBCPP_STD_VER >= 23 diff --git a/libcxx/include/__format/format_arg.h b/libcxx/include/__format/format_arg.h index a25976a0795e7..7e37dd4f0b377 100644 --- a/libcxx/include/__format/format_arg.h +++ b/libcxx/include/__format/format_arg.h @@ -95,7 +95,7 @@ constexpr __arg_t __get_packed_type(uint64_t __types, size_t __id) { // This function is not user obervable, so it can directly use the non-standard // types of the "variant". See __arg_t for more details. template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT decltype(auto) +_LIBCPP_HIDE_FROM_ABI decltype(auto) __visit_format_arg(_Visitor&& __vis, basic_format_arg<_Context> __arg) { switch (__arg.__type_) { case __format::__arg_t::__none: @@ -225,7 +225,7 @@ class __basic_format_arg_value { }; template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT basic_format_arg { +class _LIBCPP_TEMPLATE_VIS basic_format_arg { public: class _LIBCPP_TEMPLATE_VIS handle; @@ -277,7 +277,7 @@ class _LIBCPP_TEMPLATE_VIS basic_format_arg<_Context>::handle { // This function is user facing, so it must wrap the non-standard types of // the "variant" in a handle to stay conforming. See __arg_t for more details. template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT decltype(auto) +_LIBCPP_HIDE_FROM_ABI decltype(auto) visit_format_arg(_Visitor&& __vis, basic_format_arg<_Context> __arg) { switch (__arg.__type_) { # ifndef _LIBCPP_HAS_NO_INT128 diff --git a/libcxx/include/__format/format_args.h b/libcxx/include/__format/format_args.h index 32f1de97c2d1c..c2e7c96412377 100644 --- a/libcxx/include/__format/format_args.h +++ b/libcxx/include/__format/format_args.h @@ -27,7 +27,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT basic_format_args { +class _LIBCPP_TEMPLATE_VIS basic_format_args { public: _LIBCPP_HIDE_FROM_ABI basic_format_args() noexcept = default; diff --git a/libcxx/include/__format/format_context.h b/libcxx/include/__format/format_context.h index b8a9a54cf1e22..521131db84d80 100644 --- a/libcxx/include/__format/format_context.h +++ b/libcxx/include/__format/format_context.h @@ -41,7 +41,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template requires output_iterator<_OutIt, const _CharT&> -class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT basic_format_context; +class _LIBCPP_TEMPLATE_VIS basic_format_context; #ifndef _LIBCPP_HAS_NO_LOCALIZATION /** @@ -80,7 +80,6 @@ requires output_iterator<_OutIt, const _CharT&> class // clang-format off _LIBCPP_TEMPLATE_VIS - _LIBCPP_AVAILABILITY_FORMAT _LIBCPP_PREFERRED_NAME(format_context) _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wformat_context)) // clang-format on @@ -162,7 +161,7 @@ class // Here the width of an element in input is determined dynamically. // Note when the top-level element has no width the retargeting is not needed. template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT +class _LIBCPP_TEMPLATE_VIS basic_format_context::__iterator, _CharT> { public: using iterator = typename __format::__retarget_buffer<_CharT>::__iterator; diff --git a/libcxx/include/__format/format_functions.h b/libcxx/include/__format/format_functions.h index 218ae5b34c173..75afd92ce0566 100644 --- a/libcxx/include/__format/format_functions.h +++ b/libcxx/include/__format/format_functions.h @@ -344,7 +344,7 @@ struct _LIBCPP_TEMPLATE_VIS basic_format_string { _Context{__types_.data(), __handles_.data(), sizeof...(_Args)}); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT constexpr basic_string_view<_CharT> get() const noexcept { + _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view<_CharT> get() const noexcept { return __str_; } @@ -409,21 +409,21 @@ requires(output_iterator<_OutIt, const _CharT&>) _LIBCPP_HIDE_FROM_ABI _OutIt // https://reviews.llvm.org/D110499#inline-1180704 // TODO FMT Evaluate whether we want to file a Clang bug report regarding this. template _OutIt> -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _OutIt vformat_to(_OutIt __out_it, string_view __fmt, format_args __args) { return _VSTD::__vformat_to(_VSTD::move(__out_it), __fmt, __args); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template _OutIt> -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _OutIt vformat_to(_OutIt __out_it, wstring_view __fmt, wformat_args __args) { return _VSTD::__vformat_to(_VSTD::move(__out_it), __fmt, __args); } #endif template _OutIt, class... _Args> -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _OutIt format_to(_OutIt __out_it, format_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::vformat_to(_VSTD::move(__out_it), __fmt.get(), _VSTD::make_format_args(__args...)); @@ -431,14 +431,17 @@ format_to(_OutIt __out_it, format_string<_Args...> __fmt, _Args&&... __args) { #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template _OutIt, class... _Args> -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _OutIt format_to(_OutIt __out_it, wformat_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::vformat_to(_VSTD::move(__out_it), __fmt.get(), _VSTD::make_wformat_args(__args...)); } #endif -_LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string +// TODO FMT This needs to be a template or std::to_chars(floating-point) availability markup +// fires too eagerly, see http://llvm.org/PR61563. +template +_LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI string vformat(string_view __fmt, format_args __args) { string __res; _VSTD::vformat_to(_VSTD::back_inserter(__res), __fmt, __args); @@ -446,7 +449,10 @@ vformat(string_view __fmt, format_args __args) { } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS -_LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring +// TODO FMT This needs to be a template or std::to_chars(floating-point) availability markup +// fires too eagerly, see http://llvm.org/PR61563. +template +_LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI wstring vformat(wstring_view __fmt, wformat_args __args) { wstring __res; _VSTD::vformat_to(_VSTD::back_inserter(__res), __fmt, __args); @@ -455,14 +461,14 @@ vformat(wstring_view __fmt, wformat_args __args) { #endif template -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string format(format_string<_Args...> __fmt, +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI string format(format_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::vformat(__fmt.get(), _VSTD::make_format_args(__args...)); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI wstring format(wformat_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::vformat(__fmt.get(), _VSTD::make_wformat_args(__args...)); } @@ -479,14 +485,14 @@ _LIBCPP_HIDE_FROM_ABI format_to_n_result<_OutIt> __vformat_to_n(_OutIt __out_it, } template _OutIt, class... _Args> -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT format_to_n_result<_OutIt> +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI format_to_n_result<_OutIt> format_to_n(_OutIt __out_it, iter_difference_t<_OutIt> __n, format_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::__vformat_to_n(_VSTD::move(__out_it), __n, __fmt.get(), _VSTD::make_format_args(__args...)); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template _OutIt, class... _Args> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT format_to_n_result<_OutIt> +_LIBCPP_HIDE_FROM_ABI format_to_n_result<_OutIt> format_to_n(_OutIt __out_it, iter_difference_t<_OutIt> __n, wformat_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::__vformat_to_n(_VSTD::move(__out_it), __n, __fmt.get(), _VSTD::make_wformat_args(__args...)); @@ -502,14 +508,14 @@ _LIBCPP_HIDE_FROM_ABI size_t __vformatted_size(basic_string_view<_CharT> __fmt, } template -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT size_t +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI size_t formatted_size(format_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::__vformatted_size(__fmt.get(), basic_format_args{_VSTD::make_format_args(__args...)}); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT size_t +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI size_t formatted_size(wformat_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::__vformatted_size(__fmt.get(), basic_format_args{_VSTD::make_wformat_args(__args...)}); } @@ -536,7 +542,7 @@ requires(output_iterator<_OutIt, const _CharT&>) _LIBCPP_HIDE_FROM_ABI _OutIt } template _OutIt> -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt vformat_to( +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _OutIt vformat_to( _OutIt __out_it, locale __loc, string_view __fmt, format_args __args) { return _VSTD::__vformat_to(_VSTD::move(__out_it), _VSTD::move(__loc), __fmt, __args); @@ -544,7 +550,7 @@ _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt v #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template _OutIt> -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt vformat_to( +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _OutIt vformat_to( _OutIt __out_it, locale __loc, wstring_view __fmt, wformat_args __args) { return _VSTD::__vformat_to(_VSTD::move(__out_it), _VSTD::move(__loc), __fmt, __args); @@ -552,7 +558,7 @@ _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt v #endif template _OutIt, class... _Args> -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _OutIt format_to(_OutIt __out_it, locale __loc, format_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::vformat_to(_VSTD::move(__out_it), _VSTD::move(__loc), __fmt.get(), _VSTD::make_format_args(__args...)); @@ -560,14 +566,17 @@ format_to(_OutIt __out_it, locale __loc, format_string<_Args...> __fmt, _Args&&. #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template _OutIt, class... _Args> -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _OutIt format_to(_OutIt __out_it, locale __loc, wformat_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::vformat_to(_VSTD::move(__out_it), _VSTD::move(__loc), __fmt.get(), _VSTD::make_wformat_args(__args...)); } #endif -_LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string +// TODO FMT This needs to be a template or std::to_chars(floating-point) availability markup +// fires too eagerly, see http://llvm.org/PR61563. +template +_LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI string vformat(locale __loc, string_view __fmt, format_args __args) { string __res; _VSTD::vformat_to(_VSTD::back_inserter(__res), _VSTD::move(__loc), __fmt, @@ -576,7 +585,10 @@ vformat(locale __loc, string_view __fmt, format_args __args) { } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS -_LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring +// TODO FMT This needs to be a template or std::to_chars(floating-point) availability markup +// fires too eagerly, see http://llvm.org/PR61563. +template +_LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI wstring vformat(locale __loc, wstring_view __fmt, wformat_args __args) { wstring __res; _VSTD::vformat_to(_VSTD::back_inserter(__res), _VSTD::move(__loc), __fmt, @@ -586,7 +598,7 @@ vformat(locale __loc, wstring_view __fmt, wformat_args __args) { #endif template -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string format(locale __loc, +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI string format(locale __loc, format_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::vformat(_VSTD::move(__loc), __fmt.get(), @@ -595,7 +607,7 @@ _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string f #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI wstring format(locale __loc, wformat_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::vformat(_VSTD::move(__loc), __fmt.get(), _VSTD::make_wformat_args(__args...)); @@ -614,7 +626,7 @@ _LIBCPP_HIDE_FROM_ABI format_to_n_result<_OutIt> __vformat_to_n(_OutIt __out_it, } template _OutIt, class... _Args> -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT format_to_n_result<_OutIt> +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI format_to_n_result<_OutIt> format_to_n(_OutIt __out_it, iter_difference_t<_OutIt> __n, locale __loc, format_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::__vformat_to_n(_VSTD::move(__out_it), __n, _VSTD::move(__loc), __fmt.get(), @@ -623,7 +635,7 @@ format_to_n(_OutIt __out_it, iter_difference_t<_OutIt> __n, locale __loc, format #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template _OutIt, class... _Args> -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT format_to_n_result<_OutIt> +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI format_to_n_result<_OutIt> format_to_n(_OutIt __out_it, iter_difference_t<_OutIt> __n, locale __loc, wformat_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::__vformat_to_n(_VSTD::move(__out_it), __n, _VSTD::move(__loc), __fmt.get(), @@ -641,14 +653,14 @@ _LIBCPP_HIDE_FROM_ABI size_t __vformatted_size(locale __loc, basic_string_view<_ } template -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT size_t +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI size_t formatted_size(locale __loc, format_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::__vformatted_size(_VSTD::move(__loc), __fmt.get(), basic_format_args{_VSTD::make_format_args(__args...)}); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT size_t +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI size_t formatted_size(locale __loc, wformat_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::__vformatted_size(_VSTD::move(__loc), __fmt.get(), basic_format_args{_VSTD::make_wformat_args(__args...)}); } diff --git a/libcxx/include/__format/format_fwd.h b/libcxx/include/__format/format_fwd.h index d3e573f893672..120b2fc8d47de 100644 --- a/libcxx/include/__format/format_fwd.h +++ b/libcxx/include/__format/format_fwd.h @@ -23,14 +23,14 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT basic_format_arg; +class _LIBCPP_TEMPLATE_VIS basic_format_arg; template requires output_iterator<_OutIt, const _CharT&> -class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT basic_format_context; +class _LIBCPP_TEMPLATE_VIS basic_format_context; template -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter; +struct _LIBCPP_TEMPLATE_VIS formatter; #endif //_LIBCPP_STD_VER >= 20 diff --git a/libcxx/include/__format/format_parse_context.h b/libcxx/include/__format/format_parse_context.h index ac2f5a843405f..79f53f77d4a05 100644 --- a/libcxx/include/__format/format_parse_context.h +++ b/libcxx/include/__format/format_parse_context.h @@ -24,7 +24,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT basic_format_parse_context { +class _LIBCPP_TEMPLATE_VIS basic_format_parse_context { public: using char_type = _CharT; using const_iterator = typename basic_string_view<_CharT>::const_iterator; diff --git a/libcxx/include/__format/formatter.h b/libcxx/include/__format/formatter.h index e2c58889c5e17..172b2d5f7b8a1 100644 --- a/libcxx/include/__format/formatter.h +++ b/libcxx/include/__format/formatter.h @@ -32,7 +32,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD /// - is_copy_assignable, and /// - is_move_assignable. template -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter { +struct _LIBCPP_TEMPLATE_VIS formatter { formatter() = delete; formatter(const formatter&) = delete; formatter& operator=(const formatter&) = delete; diff --git a/libcxx/include/__format/formatter_bool.h b/libcxx/include/__format/formatter_bool.h index 84f8bcfa629bf..1fb75755fc572 100644 --- a/libcxx/include/__format/formatter_bool.h +++ b/libcxx/include/__format/formatter_bool.h @@ -36,7 +36,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter { +struct _LIBCPP_TEMPLATE_VIS formatter { public: _LIBCPP_HIDE_FROM_ABI constexpr auto parse(basic_format_parse_context<_CharT>& __parse_ctx) -> decltype(__parse_ctx.begin()) { diff --git a/libcxx/include/__format/formatter_char.h b/libcxx/include/__format/formatter_char.h index eaac70cbe18ee..7d63c042c554f 100644 --- a/libcxx/include/__format/formatter_char.h +++ b/libcxx/include/__format/formatter_char.h @@ -31,7 +31,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __formatter_char { +struct _LIBCPP_TEMPLATE_VIS __formatter_char { public: _LIBCPP_HIDE_FROM_ABI constexpr auto parse(basic_format_parse_context<_CharT>& __parse_ctx) -> decltype(__parse_ctx.begin()) { @@ -74,14 +74,14 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __formatter_char { }; template <> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter : public __formatter_char {}; +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_char {}; # ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template <> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter : public __formatter_char {}; +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_char {}; template <> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter : public __formatter_char { +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_char { }; # endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS diff --git a/libcxx/include/__format/formatter_floating_point.h b/libcxx/include/__format/formatter_floating_point.h index 31cd012e484ad..cac74c1f2a62c 100644 --- a/libcxx/include/__format/formatter_floating_point.h +++ b/libcxx/include/__format/formatter_floating_point.h @@ -739,13 +739,13 @@ struct _LIBCPP_TEMPLATE_VIS __formatter_floating_point { }; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_floating_point<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_floating_point<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_floating_point<_CharT> {}; #endif //_LIBCPP_STD_VER >= 20 diff --git a/libcxx/include/__format/formatter_integer.h b/libcxx/include/__format/formatter_integer.h index f157698818ac8..0e144100da9ab 100644 --- a/libcxx/include/__format/formatter_integer.h +++ b/libcxx/include/__format/formatter_integer.h @@ -31,7 +31,7 @@ #if _LIBCPP_STD_VER >= 20 template <__fmt_char_type _CharT> - struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __formatter_integer { + struct _LIBCPP_TEMPLATE_VIS __formatter_integer { public: _LIBCPP_HIDE_FROM_ABI constexpr auto @@ -60,43 +60,43 @@ // Signed integral types. template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter : public __formatter_integer<_CharT> { +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> { }; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter : public __formatter_integer<_CharT> {}; +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter : public __formatter_integer<_CharT> {}; +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; # ifndef _LIBCPP_HAS_NO_INT128 template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<__int128_t, _CharT> +struct _LIBCPP_TEMPLATE_VIS formatter<__int128_t, _CharT> : public __formatter_integer<_CharT> {}; # endif // Unsigned integral types. template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; # ifndef _LIBCPP_HAS_NO_INT128 template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<__uint128_t, _CharT> +struct _LIBCPP_TEMPLATE_VIS formatter<__uint128_t, _CharT> : public __formatter_integer<_CharT> {}; # endif diff --git a/libcxx/include/__format/formatter_pointer.h b/libcxx/include/__format/formatter_pointer.h index fe1b3cb496f1a..48d8372a2341f 100644 --- a/libcxx/include/__format/formatter_pointer.h +++ b/libcxx/include/__format/formatter_pointer.h @@ -55,13 +55,13 @@ struct _LIBCPP_TEMPLATE_VIS __formatter_pointer { // - template<> struct formatter; // - template<> struct formatter; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_pointer<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter : public __formatter_pointer<_CharT> { +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_pointer<_CharT> { }; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_pointer<_CharT> {}; #endif //_LIBCPP_STD_VER >= 20 diff --git a/libcxx/include/__format/formatter_string.h b/libcxx/include/__format/formatter_string.h index e11708d8e28cc..c14518be495bc 100644 --- a/libcxx/include/__format/formatter_string.h +++ b/libcxx/include/__format/formatter_string.h @@ -57,7 +57,7 @@ struct _LIBCPP_TEMPLATE_VIS __formatter_string { // Formatter const char*. template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_string<_CharT> { using _Base = __formatter_string<_CharT>; @@ -95,7 +95,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<_CharT*, _CharT> +struct _LIBCPP_TEMPLATE_VIS formatter<_CharT*, _CharT> : public formatter { using _Base = formatter; @@ -106,7 +106,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<_CharT*, _Char // Formatter char[]. template <__fmt_char_type _CharT, size_t _Size> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<_CharT[_Size], _CharT> +struct _LIBCPP_TEMPLATE_VIS formatter<_CharT[_Size], _CharT> : public __formatter_string<_CharT> { using _Base = __formatter_string<_CharT>; @@ -117,7 +117,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<_CharT[_Size], // Formatter std::string. template <__fmt_char_type _CharT, class _Traits, class _Allocator> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter, _CharT> +struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_string<_CharT> { using _Base = __formatter_string<_CharT>; @@ -130,7 +130,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter, _CharT> +struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_string<_CharT> { using _Base = __formatter_string<_CharT>; diff --git a/libcxx/include/__format/formatter_tuple.h b/libcxx/include/__format/formatter_tuple.h index d1874ddecb723..e6831de78c227 100644 --- a/libcxx/include/__format/formatter_tuple.h +++ b/libcxx/include/__format/formatter_tuple.h @@ -39,7 +39,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 23 template <__fmt_char_type _CharT, class _Tuple, formattable<_CharT>... _Args> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __formatter_tuple { +struct _LIBCPP_TEMPLATE_VIS __formatter_tuple { _LIBCPP_HIDE_FROM_ABI constexpr void set_separator(basic_string_view<_CharT> __separator) noexcept { __separator_ = __separator; } @@ -164,11 +164,11 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __formatter_tuple { }; template <__fmt_char_type _CharT, formattable<_CharT>... _Args> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter, _CharT> +struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_tuple<_CharT, pair<_Args...>, _Args...> {}; template <__fmt_char_type _CharT, formattable<_CharT>... _Args> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter, _CharT> +struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_tuple<_CharT, tuple<_Args...>, _Args...> {}; #endif //_LIBCPP_STD_VER >= 23 diff --git a/libcxx/include/__format/range_default_formatter.h b/libcxx/include/__format/range_default_formatter.h index 7fdb254de3b88..eab2951fcf552 100644 --- a/libcxx/include/__format/range_default_formatter.h +++ b/libcxx/include/__format/range_default_formatter.h @@ -84,12 +84,12 @@ inline constexpr range_format format_kind<_Rp> = [] { }(); template -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __range_default_formatter; +struct _LIBCPP_TEMPLATE_VIS __range_default_formatter; // Required specializations template -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __range_default_formatter { +struct _LIBCPP_TEMPLATE_VIS __range_default_formatter { private: using __maybe_const_r = __fmt_maybe_const<_Rp, _CharT>; range_formatter>, _CharT> __underlying_; @@ -115,7 +115,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __range_default_formatte }; template -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __range_default_formatter { +struct _LIBCPP_TEMPLATE_VIS __range_default_formatter { private: using __maybe_const_map = __fmt_maybe_const<_Rp, _CharT>; using __element_type = remove_cvref_t>; @@ -143,7 +143,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __range_default_formatte }; template -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __range_default_formatter { +struct _LIBCPP_TEMPLATE_VIS __range_default_formatter { private: using __maybe_const_set = __fmt_maybe_const<_Rp, _CharT>; using __element_type = remove_cvref_t>; @@ -168,14 +168,13 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __range_default_formatte template requires(_Kp == range_format::string || _Kp == range_format::debug_string) -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __range_default_formatter<_Kp, _Rp, _CharT> { +struct _LIBCPP_TEMPLATE_VIS __range_default_formatter<_Kp, _Rp, _CharT> { __range_default_formatter() = delete; // TODO FMT Implement }; template requires(format_kind<_Rp> != range_format::disabled && formattable, _CharT>) -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<_Rp, _CharT> - : __range_default_formatter, _Rp, _CharT> {}; +struct _LIBCPP_TEMPLATE_VIS formatter<_Rp, _CharT> : __range_default_formatter, _Rp, _CharT> {}; #endif //_LIBCPP_STD_VER >= 23 diff --git a/libcxx/include/__format/range_formatter.h b/libcxx/include/__format/range_formatter.h index f60151029c9c3..47323433d76fa 100644 --- a/libcxx/include/__format/range_formatter.h +++ b/libcxx/include/__format/range_formatter.h @@ -40,7 +40,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template requires same_as, _Tp> && formattable<_Tp, _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT range_formatter { +struct _LIBCPP_TEMPLATE_VIS range_formatter { _LIBCPP_HIDE_FROM_ABI constexpr void set_separator(basic_string_view<_CharT> __separator) noexcept { __separator_ = __separator; } diff --git a/libcxx/include/__mutex/lock_guard.h b/libcxx/include/__mutex/lock_guard.h new file mode 100644 index 0000000000000..c075512fb97a9 --- /dev/null +++ b/libcxx/include/__mutex/lock_guard.h @@ -0,0 +1,53 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___MUTEX_LOCK_GUARD_H +#define _LIBCPP___MUTEX_LOCK_GUARD_H + +#include <__config> +#include <__mutex/tag_types.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#ifndef _LIBCPP_HAS_NO_THREADS + +_LIBCPP_BEGIN_NAMESPACE_STD + +template +class _LIBCPP_TEMPLATE_VIS _LIBCPP_THREAD_SAFETY_ANNOTATION(scoped_lockable) lock_guard { +public: + typedef _Mutex mutex_type; + +private: + mutex_type& __m_; + +public: + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI explicit lock_guard(mutex_type& __m) + _LIBCPP_THREAD_SAFETY_ANNOTATION(acquire_capability(__m)) + : __m_(__m) { + __m_.lock(); + } + + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI lock_guard(mutex_type& __m, adopt_lock_t) + _LIBCPP_THREAD_SAFETY_ANNOTATION(requires_capability(__m)) + : __m_(__m) {} + _LIBCPP_HIDE_FROM_ABI ~lock_guard() _LIBCPP_THREAD_SAFETY_ANNOTATION(release_capability()) { __m_.unlock(); } + +private: + lock_guard(lock_guard const&) = delete; + lock_guard& operator=(lock_guard const&) = delete; +}; +_LIBCPP_CTAD_SUPPORTED_FOR_TYPE(lock_guard); + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_HAS_NO_THREADS + +#endif // _LIBCPP___MUTEX_LOCK_GUARD_H diff --git a/libcxx/include/__mutex/mutex.h b/libcxx/include/__mutex/mutex.h new file mode 100644 index 0000000000000..13def1e5d1535 --- /dev/null +++ b/libcxx/include/__mutex/mutex.h @@ -0,0 +1,53 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___MUTEX_MUTEX_H +#define _LIBCPP___MUTEX_MUTEX_H + +#include <__config> +#include <__threading_support> +#include <__type_traits/is_nothrow_default_constructible.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#ifndef _LIBCPP_HAS_NO_THREADS + +_LIBCPP_BEGIN_NAMESPACE_STD + +class _LIBCPP_TYPE_VIS _LIBCPP_THREAD_SAFETY_ANNOTATION(capability("mutex")) mutex { + __libcpp_mutex_t __m_ = _LIBCPP_MUTEX_INITIALIZER; + +public: + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR mutex() = default; + + mutex(const mutex&) = delete; + mutex& operator=(const mutex&) = delete; + +# if defined(_LIBCPP_HAS_TRIVIAL_MUTEX_DESTRUCTION) + ~mutex() = default; +# else + ~mutex() _NOEXCEPT; +# endif + + void lock() _LIBCPP_THREAD_SAFETY_ANNOTATION(acquire_capability()); + bool try_lock() _NOEXCEPT _LIBCPP_THREAD_SAFETY_ANNOTATION(try_acquire_capability(true)); + void unlock() _NOEXCEPT _LIBCPP_THREAD_SAFETY_ANNOTATION(release_capability()); + + typedef __libcpp_mutex_t* native_handle_type; + _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() { return &__m_; } +}; + +static_assert(is_nothrow_default_constructible::value, "the default constructor for std::mutex must be nothrow"); + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_HAS_NO_THREADS + +#endif // _LIBCPP___MUTEX_MUTEX_H diff --git a/libcxx/include/__mutex/tag_types.h b/libcxx/include/__mutex/tag_types.h new file mode 100644 index 0000000000000..02cf007ae1219 --- /dev/null +++ b/libcxx/include/__mutex/tag_types.h @@ -0,0 +1,48 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___MUTEX_TAG_TYPES_H +#define _LIBCPP___MUTEX_TAG_TYPES_H + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#ifndef _LIBCPP_HAS_NO_THREADS + +_LIBCPP_BEGIN_NAMESPACE_STD + +struct _LIBCPP_TYPE_VIS defer_lock_t { + explicit defer_lock_t() = default; +}; + +struct _LIBCPP_TYPE_VIS try_to_lock_t { + explicit try_to_lock_t() = default; +}; + +struct _LIBCPP_TYPE_VIS adopt_lock_t { + explicit adopt_lock_t() = default; +}; + +# if defined(_LIBCPP_BUILDING_LIBRARY) +extern _LIBCPP_EXPORTED_FROM_ABI const defer_lock_t defer_lock; +extern _LIBCPP_EXPORTED_FROM_ABI const try_to_lock_t try_to_lock; +extern _LIBCPP_EXPORTED_FROM_ABI const adopt_lock_t adopt_lock; +# elif !defined(_LIBCPP_CXX03_LANG) +/* inline */ constexpr defer_lock_t defer_lock = defer_lock_t(); +/* inline */ constexpr try_to_lock_t try_to_lock = try_to_lock_t(); +/* inline */ constexpr adopt_lock_t adopt_lock = adopt_lock_t(); +# endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_HAS_NO_THREADS + +#endif // _LIBCPP___MUTEX_TAG_TYPES_H diff --git a/libcxx/include/__mutex/unique_lock.h b/libcxx/include/__mutex/unique_lock.h new file mode 100644 index 0000000000000..a057d1c69d3ef --- /dev/null +++ b/libcxx/include/__mutex/unique_lock.h @@ -0,0 +1,172 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___MUTEX_UNIQUE_LOCK_H +#define _LIBCPP___MUTEX_UNIQUE_LOCK_H + +#include <__chrono/duration.h> +#include <__chrono/time_point.h> +#include <__config> +#include <__memory/addressof.h> +#include <__mutex/tag_types.h> +#include <__utility/swap.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#ifndef _LIBCPP_HAS_NO_THREADS + +_LIBCPP_BEGIN_NAMESPACE_STD + +template +class _LIBCPP_TEMPLATE_VIS unique_lock { +public: + typedef _Mutex mutex_type; + +private: + mutex_type* __m_; + bool __owns_; + +public: + _LIBCPP_HIDE_FROM_ABI unique_lock() _NOEXCEPT : __m_(nullptr), __owns_(false) {} + _LIBCPP_HIDE_FROM_ABI explicit unique_lock(mutex_type& __m) : __m_(std::addressof(__m)), __owns_(true) { + __m_->lock(); + } + + _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, defer_lock_t) _NOEXCEPT + : __m_(std::addressof(__m)), + __owns_(false) {} + + _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, try_to_lock_t) + : __m_(std::addressof(__m)), __owns_(__m.try_lock()) {} + + _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, adopt_lock_t) : __m_(std::addressof(__m)), __owns_(true) {} + + template + _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, const chrono::time_point<_Clock, _Duration>& __t) + : __m_(std::addressof(__m)), __owns_(__m.try_lock_until(__t)) {} + + template + _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, const chrono::duration<_Rep, _Period>& __d) + : __m_(std::addressof(__m)), __owns_(__m.try_lock_for(__d)) {} + + _LIBCPP_HIDE_FROM_ABI ~unique_lock() { + if (__owns_) + __m_->unlock(); + } + + unique_lock(unique_lock const&) = delete; + unique_lock& operator=(unique_lock const&) = delete; + + _LIBCPP_HIDE_FROM_ABI unique_lock(unique_lock&& __u) _NOEXCEPT : __m_(__u.__m_), __owns_(__u.__owns_) { + __u.__m_ = nullptr; + __u.__owns_ = false; + } + + _LIBCPP_HIDE_FROM_ABI unique_lock& operator=(unique_lock&& __u) _NOEXCEPT { + if (__owns_) + __m_->unlock(); + + __m_ = __u.__m_; + __owns_ = __u.__owns_; + __u.__m_ = nullptr; + __u.__owns_ = false; + return *this; + } + + void lock(); + bool try_lock(); + + template + bool try_lock_for(const chrono::duration<_Rep, _Period>& __d); + + template + bool try_lock_until(const chrono::time_point<_Clock, _Duration>& __t); + + void unlock(); + + _LIBCPP_HIDE_FROM_ABI void swap(unique_lock& __u) _NOEXCEPT { + std::swap(__m_, __u.__m_); + std::swap(__owns_, __u.__owns_); + } + + _LIBCPP_HIDE_FROM_ABI mutex_type* release() _NOEXCEPT { + mutex_type* __m = __m_; + __m_ = nullptr; + __owns_ = false; + return __m; + } + + _LIBCPP_HIDE_FROM_ABI bool owns_lock() const _NOEXCEPT { return __owns_; } + _LIBCPP_HIDE_FROM_ABI explicit operator bool() const _NOEXCEPT { return __owns_; } + _LIBCPP_HIDE_FROM_ABI mutex_type* mutex() const _NOEXCEPT { return __m_; } +}; +_LIBCPP_CTAD_SUPPORTED_FOR_TYPE(unique_lock); + +template +void unique_lock<_Mutex>::lock() { + if (__m_ == nullptr) + __throw_system_error(EPERM, "unique_lock::lock: references null mutex"); + if (__owns_) + __throw_system_error(EDEADLK, "unique_lock::lock: already locked"); + __m_->lock(); + __owns_ = true; +} + +template +bool unique_lock<_Mutex>::try_lock() { + if (__m_ == nullptr) + __throw_system_error(EPERM, "unique_lock::try_lock: references null mutex"); + if (__owns_) + __throw_system_error(EDEADLK, "unique_lock::try_lock: already locked"); + __owns_ = __m_->try_lock(); + return __owns_; +} + +template +template +bool unique_lock<_Mutex>::try_lock_for(const chrono::duration<_Rep, _Period>& __d) { + if (__m_ == nullptr) + __throw_system_error(EPERM, "unique_lock::try_lock_for: references null mutex"); + if (__owns_) + __throw_system_error(EDEADLK, "unique_lock::try_lock_for: already locked"); + __owns_ = __m_->try_lock_for(__d); + return __owns_; +} + +template +template +bool unique_lock<_Mutex>::try_lock_until(const chrono::time_point<_Clock, _Duration>& __t) { + if (__m_ == nullptr) + __throw_system_error(EPERM, "unique_lock::try_lock_until: references null mutex"); + if (__owns_) + __throw_system_error(EDEADLK, "unique_lock::try_lock_until: already locked"); + __owns_ = __m_->try_lock_until(__t); + return __owns_; +} + +template +void unique_lock<_Mutex>::unlock() { + if (!__owns_) + __throw_system_error(EPERM, "unique_lock::unlock: not locked"); + __m_->unlock(); + __owns_ = false; +} + +template +inline _LIBCPP_HIDE_FROM_ABI void swap(unique_lock<_Mutex>& __x, unique_lock<_Mutex>& __y) _NOEXCEPT { + __x.swap(__y); +} + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_HAS_NO_THREADS + +#endif // _LIBCPP___MUTEX_UNIQUE_LOCK_H diff --git a/libcxx/include/__mutex_base b/libcxx/include/__mutex_base deleted file mode 100644 index 191955363a2d3..0000000000000 --- a/libcxx/include/__mutex_base +++ /dev/null @@ -1,521 +0,0 @@ -// -*- C++ -*- -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef _LIBCPP___MUTEX_BASE -#define _LIBCPP___MUTEX_BASE - -#include <__chrono/duration.h> -#include <__chrono/steady_clock.h> -#include <__chrono/system_clock.h> -#include <__chrono/time_point.h> -#include <__config> -#include <__threading_support> -#include <__type_traits/is_floating_point.h> -#include <__type_traits/is_nothrow_default_constructible.h> -#include -#include -#include - -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif - -_LIBCPP_PUSH_MACROS -#include <__undef_macros> - - -_LIBCPP_BEGIN_NAMESPACE_STD - -#ifndef _LIBCPP_HAS_NO_THREADS - -class _LIBCPP_TYPE_VIS _LIBCPP_THREAD_SAFETY_ANNOTATION(capability("mutex")) mutex -{ - __libcpp_mutex_t __m_ = _LIBCPP_MUTEX_INITIALIZER; - -public: - _LIBCPP_INLINE_VISIBILITY - _LIBCPP_CONSTEXPR mutex() = default; - - mutex(const mutex&) = delete; - mutex& operator=(const mutex&) = delete; - -#if defined(_LIBCPP_HAS_TRIVIAL_MUTEX_DESTRUCTION) - ~mutex() = default; -#else - ~mutex() _NOEXCEPT; -#endif - - void lock() _LIBCPP_THREAD_SAFETY_ANNOTATION(acquire_capability()); - bool try_lock() _NOEXCEPT _LIBCPP_THREAD_SAFETY_ANNOTATION(try_acquire_capability(true)); - void unlock() _NOEXCEPT _LIBCPP_THREAD_SAFETY_ANNOTATION(release_capability()); - - typedef __libcpp_mutex_t* native_handle_type; - _LIBCPP_INLINE_VISIBILITY native_handle_type native_handle() {return &__m_;} -}; - -static_assert(is_nothrow_default_constructible::value, - "the default constructor for std::mutex must be nothrow"); - -struct _LIBCPP_TYPE_VIS defer_lock_t { explicit defer_lock_t() = default; }; -struct _LIBCPP_TYPE_VIS try_to_lock_t { explicit try_to_lock_t() = default; }; -struct _LIBCPP_TYPE_VIS adopt_lock_t { explicit adopt_lock_t() = default; }; - -# if defined(_LIBCPP_BUILDING_LIBRARY) -extern _LIBCPP_EXPORTED_FROM_ABI const defer_lock_t defer_lock; -extern _LIBCPP_EXPORTED_FROM_ABI const try_to_lock_t try_to_lock; -extern _LIBCPP_EXPORTED_FROM_ABI const adopt_lock_t adopt_lock; -# elif !defined(_LIBCPP_CXX03_LANG) -/* inline */ constexpr defer_lock_t defer_lock = defer_lock_t(); -/* inline */ constexpr try_to_lock_t try_to_lock = try_to_lock_t(); -/* inline */ constexpr adopt_lock_t adopt_lock = adopt_lock_t(); -# endif - -template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_THREAD_SAFETY_ANNOTATION(scoped_lockable) -lock_guard -{ -public: - typedef _Mutex mutex_type; - -private: - mutex_type& __m_; -public: - - _LIBCPP_NODISCARD_EXT _LIBCPP_INLINE_VISIBILITY - explicit lock_guard(mutex_type& __m) _LIBCPP_THREAD_SAFETY_ANNOTATION(acquire_capability(__m)) - : __m_(__m) {__m_.lock();} - - _LIBCPP_NODISCARD_EXT _LIBCPP_INLINE_VISIBILITY - lock_guard(mutex_type& __m, adopt_lock_t) _LIBCPP_THREAD_SAFETY_ANNOTATION(requires_capability(__m)) - : __m_(__m) {} - _LIBCPP_INLINE_VISIBILITY - ~lock_guard() _LIBCPP_THREAD_SAFETY_ANNOTATION(release_capability()) {__m_.unlock();} - -private: - lock_guard(lock_guard const&) = delete; - lock_guard& operator=(lock_guard const&) = delete; -}; -_LIBCPP_CTAD_SUPPORTED_FOR_TYPE(lock_guard); - -template -class _LIBCPP_TEMPLATE_VIS unique_lock -{ -public: - typedef _Mutex mutex_type; - -private: - mutex_type* __m_; - bool __owns_; - -public: - _LIBCPP_INLINE_VISIBILITY - unique_lock() _NOEXCEPT : __m_(nullptr), __owns_(false) {} - _LIBCPP_INLINE_VISIBILITY - explicit unique_lock(mutex_type& __m) - : __m_(_VSTD::addressof(__m)), __owns_(true) {__m_->lock();} - _LIBCPP_INLINE_VISIBILITY - unique_lock(mutex_type& __m, defer_lock_t) _NOEXCEPT - : __m_(_VSTD::addressof(__m)), __owns_(false) {} - _LIBCPP_INLINE_VISIBILITY - unique_lock(mutex_type& __m, try_to_lock_t) - : __m_(_VSTD::addressof(__m)), __owns_(__m.try_lock()) {} - _LIBCPP_INLINE_VISIBILITY - unique_lock(mutex_type& __m, adopt_lock_t) - : __m_(_VSTD::addressof(__m)), __owns_(true) {} - template - _LIBCPP_INLINE_VISIBILITY - unique_lock(mutex_type& __m, const chrono::time_point<_Clock, _Duration>& __t) - : __m_(_VSTD::addressof(__m)), __owns_(__m.try_lock_until(__t)) {} - template - _LIBCPP_INLINE_VISIBILITY - unique_lock(mutex_type& __m, const chrono::duration<_Rep, _Period>& __d) - : __m_(_VSTD::addressof(__m)), __owns_(__m.try_lock_for(__d)) {} - _LIBCPP_INLINE_VISIBILITY - ~unique_lock() - { - if (__owns_) - __m_->unlock(); - } - - unique_lock(unique_lock const&) = delete; - unique_lock& operator=(unique_lock const&) = delete; - - _LIBCPP_INLINE_VISIBILITY - unique_lock(unique_lock&& __u) _NOEXCEPT - : __m_(__u.__m_), __owns_(__u.__owns_) - {__u.__m_ = nullptr; __u.__owns_ = false;} - _LIBCPP_INLINE_VISIBILITY - unique_lock& operator=(unique_lock&& __u) _NOEXCEPT - { - if (__owns_) - __m_->unlock(); - __m_ = __u.__m_; - __owns_ = __u.__owns_; - __u.__m_ = nullptr; - __u.__owns_ = false; - return *this; - } - - void lock(); - bool try_lock(); - - template - bool try_lock_for(const chrono::duration<_Rep, _Period>& __d); - template - bool try_lock_until(const chrono::time_point<_Clock, _Duration>& __t); - - void unlock(); - - _LIBCPP_INLINE_VISIBILITY - void swap(unique_lock& __u) _NOEXCEPT - { - _VSTD::swap(__m_, __u.__m_); - _VSTD::swap(__owns_, __u.__owns_); - } - _LIBCPP_INLINE_VISIBILITY - mutex_type* release() _NOEXCEPT - { - mutex_type* __m = __m_; - __m_ = nullptr; - __owns_ = false; - return __m; - } - - _LIBCPP_INLINE_VISIBILITY - bool owns_lock() const _NOEXCEPT {return __owns_;} - _LIBCPP_INLINE_VISIBILITY - explicit operator bool() const _NOEXCEPT {return __owns_;} - _LIBCPP_INLINE_VISIBILITY - mutex_type* mutex() const _NOEXCEPT {return __m_;} -}; -_LIBCPP_CTAD_SUPPORTED_FOR_TYPE(unique_lock); - -template -void -unique_lock<_Mutex>::lock() -{ - if (__m_ == nullptr) - __throw_system_error(EPERM, "unique_lock::lock: references null mutex"); - if (__owns_) - __throw_system_error(EDEADLK, "unique_lock::lock: already locked"); - __m_->lock(); - __owns_ = true; -} - -template -bool -unique_lock<_Mutex>::try_lock() -{ - if (__m_ == nullptr) - __throw_system_error(EPERM, "unique_lock::try_lock: references null mutex"); - if (__owns_) - __throw_system_error(EDEADLK, "unique_lock::try_lock: already locked"); - __owns_ = __m_->try_lock(); - return __owns_; -} - -template -template -bool -unique_lock<_Mutex>::try_lock_for(const chrono::duration<_Rep, _Period>& __d) -{ - if (__m_ == nullptr) - __throw_system_error(EPERM, "unique_lock::try_lock_for: references null mutex"); - if (__owns_) - __throw_system_error(EDEADLK, "unique_lock::try_lock_for: already locked"); - __owns_ = __m_->try_lock_for(__d); - return __owns_; -} - -template -template -bool -unique_lock<_Mutex>::try_lock_until(const chrono::time_point<_Clock, _Duration>& __t) -{ - if (__m_ == nullptr) - __throw_system_error(EPERM, "unique_lock::try_lock_until: references null mutex"); - if (__owns_) - __throw_system_error(EDEADLK, "unique_lock::try_lock_until: already locked"); - __owns_ = __m_->try_lock_until(__t); - return __owns_; -} - -template -void -unique_lock<_Mutex>::unlock() -{ - if (!__owns_) - __throw_system_error(EPERM, "unique_lock::unlock: not locked"); - __m_->unlock(); - __owns_ = false; -} - -template -inline _LIBCPP_INLINE_VISIBILITY -void -swap(unique_lock<_Mutex>& __x, unique_lock<_Mutex>& __y) _NOEXCEPT - {__x.swap(__y);} - -//enum class cv_status -_LIBCPP_DECLARE_STRONG_ENUM(cv_status) -{ - no_timeout, - timeout -}; -_LIBCPP_DECLARE_STRONG_ENUM_EPILOG(cv_status) - -class _LIBCPP_TYPE_VIS condition_variable -{ - __libcpp_condvar_t __cv_ = _LIBCPP_CONDVAR_INITIALIZER; -public: - _LIBCPP_INLINE_VISIBILITY - _LIBCPP_CONSTEXPR condition_variable() _NOEXCEPT = default; - -#ifdef _LIBCPP_HAS_TRIVIAL_CONDVAR_DESTRUCTION - ~condition_variable() = default; -#else - ~condition_variable(); -#endif - - condition_variable(const condition_variable&) = delete; - condition_variable& operator=(const condition_variable&) = delete; - - void notify_one() _NOEXCEPT; - void notify_all() _NOEXCEPT; - - void wait(unique_lock& __lk) _NOEXCEPT; - template - _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS - void wait(unique_lock& __lk, _Predicate __pred); - - template - _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS - cv_status - wait_until(unique_lock& __lk, - const chrono::time_point<_Clock, _Duration>& __t); - - template - _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS - bool - wait_until(unique_lock& __lk, - const chrono::time_point<_Clock, _Duration>& __t, - _Predicate __pred); - - template - _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS - cv_status - wait_for(unique_lock& __lk, - const chrono::duration<_Rep, _Period>& __d); - - template - bool - _LIBCPP_INLINE_VISIBILITY - wait_for(unique_lock& __lk, - const chrono::duration<_Rep, _Period>& __d, - _Predicate __pred); - - typedef __libcpp_condvar_t* native_handle_type; - _LIBCPP_INLINE_VISIBILITY native_handle_type native_handle() {return &__cv_;} - -private: - void __do_timed_wait(unique_lock& __lk, - chrono::time_point) _NOEXCEPT; -#if defined(_LIBCPP_HAS_COND_CLOCKWAIT) - void __do_timed_wait(unique_lock& __lk, - chrono::time_point) _NOEXCEPT; -#endif - template - void __do_timed_wait(unique_lock& __lk, - chrono::time_point<_Clock, chrono::nanoseconds>) _NOEXCEPT; -}; -#endif // !_LIBCPP_HAS_NO_THREADS - -template -inline _LIBCPP_INLINE_VISIBILITY -__enable_if_t::value, chrono::nanoseconds> -__safe_nanosecond_cast(chrono::duration<_Rep, _Period> __d) -{ - using namespace chrono; - using __ratio = ratio_divide<_Period, nano>; - using __ns_rep = nanoseconds::rep; - _Rep __result_float = __d.count() * __ratio::num / __ratio::den; - - _Rep __result_max = numeric_limits<__ns_rep>::max(); - if (__result_float >= __result_max) { - return nanoseconds::max(); - } - - _Rep __result_min = numeric_limits<__ns_rep>::min(); - if (__result_float <= __result_min) { - return nanoseconds::min(); - } - - return nanoseconds(static_cast<__ns_rep>(__result_float)); -} - -template -inline _LIBCPP_INLINE_VISIBILITY -__enable_if_t::value, chrono::nanoseconds> -__safe_nanosecond_cast(chrono::duration<_Rep, _Period> __d) -{ - using namespace chrono; - if (__d.count() == 0) { - return nanoseconds(0); - } - - using __ratio = ratio_divide<_Period, nano>; - using __ns_rep = nanoseconds::rep; - __ns_rep __result_max = numeric_limits<__ns_rep>::max(); - if (__d.count() > 0 && __d.count() > __result_max / __ratio::num) { - return nanoseconds::max(); - } - - __ns_rep __result_min = numeric_limits<__ns_rep>::min(); - if (__d.count() < 0 && __d.count() < __result_min / __ratio::num) { - return nanoseconds::min(); - } - - __ns_rep __result = __d.count() * __ratio::num / __ratio::den; - if (__result == 0) { - return nanoseconds(1); - } - - return nanoseconds(__result); -} - -#ifndef _LIBCPP_HAS_NO_THREADS -template -void -condition_variable::wait(unique_lock& __lk, _Predicate __pred) -{ - while (!__pred()) - wait(__lk); -} - -template -cv_status -condition_variable::wait_until(unique_lock& __lk, - const chrono::time_point<_Clock, _Duration>& __t) -{ - using namespace chrono; - using __clock_tp_ns = time_point<_Clock, nanoseconds>; - - typename _Clock::time_point __now = _Clock::now(); - if (__t <= __now) - return cv_status::timeout; - - __clock_tp_ns __t_ns = __clock_tp_ns(_VSTD::__safe_nanosecond_cast(__t.time_since_epoch())); - - __do_timed_wait(__lk, __t_ns); - return _Clock::now() < __t ? cv_status::no_timeout : cv_status::timeout; -} - -template -bool -condition_variable::wait_until(unique_lock& __lk, - const chrono::time_point<_Clock, _Duration>& __t, - _Predicate __pred) -{ - while (!__pred()) - { - if (wait_until(__lk, __t) == cv_status::timeout) - return __pred(); - } - return true; -} - -template -cv_status -condition_variable::wait_for(unique_lock& __lk, - const chrono::duration<_Rep, _Period>& __d) -{ - using namespace chrono; - if (__d <= __d.zero()) - return cv_status::timeout; - using __ns_rep = nanoseconds::rep; - steady_clock::time_point __c_now = steady_clock::now(); - -#if defined(_LIBCPP_HAS_COND_CLOCKWAIT) - using __clock_tp_ns = time_point; - __ns_rep __now_count_ns = _VSTD::__safe_nanosecond_cast(__c_now.time_since_epoch()).count(); -#else - using __clock_tp_ns = time_point; - __ns_rep __now_count_ns = _VSTD::__safe_nanosecond_cast(system_clock::now().time_since_epoch()).count(); -#endif - - __ns_rep __d_ns_count = _VSTD::__safe_nanosecond_cast(__d).count(); - - if (__now_count_ns > numeric_limits<__ns_rep>::max() - __d_ns_count) { - __do_timed_wait(__lk, __clock_tp_ns::max()); - } else { - __do_timed_wait(__lk, __clock_tp_ns(nanoseconds(__now_count_ns + __d_ns_count))); - } - - return steady_clock::now() - __c_now < __d ? cv_status::no_timeout : - cv_status::timeout; -} - -template -inline -bool -condition_variable::wait_for(unique_lock& __lk, - const chrono::duration<_Rep, _Period>& __d, - _Predicate __pred) -{ - return wait_until(__lk, chrono::steady_clock::now() + __d, - _VSTD::move(__pred)); -} - -#if defined(_LIBCPP_HAS_COND_CLOCKWAIT) -inline -void -condition_variable::__do_timed_wait(unique_lock& __lk, - chrono::time_point __tp) _NOEXCEPT -{ - using namespace chrono; - if (!__lk.owns_lock()) - __throw_system_error(EPERM, - "condition_variable::timed wait: mutex not locked"); - nanoseconds __d = __tp.time_since_epoch(); - timespec __ts; - seconds __s = duration_cast(__d); - using __ts_sec = decltype(__ts.tv_sec); - const __ts_sec __ts_sec_max = numeric_limits<__ts_sec>::max(); - if (__s.count() < __ts_sec_max) - { - __ts.tv_sec = static_cast<__ts_sec>(__s.count()); - __ts.tv_nsec = (__d - __s).count(); - } - else - { - __ts.tv_sec = __ts_sec_max; - __ts.tv_nsec = giga::num - 1; - } - int __ec = pthread_cond_clockwait(&__cv_, __lk.mutex()->native_handle(), CLOCK_MONOTONIC, &__ts); - if (__ec != 0 && __ec != ETIMEDOUT) - __throw_system_error(__ec, "condition_variable timed_wait failed"); -} -#endif // _LIBCPP_HAS_COND_CLOCKWAIT - -template -inline -void -condition_variable::__do_timed_wait(unique_lock& __lk, - chrono::time_point<_Clock, chrono::nanoseconds> __tp) _NOEXCEPT -{ - wait_for(__lk, __tp - _Clock::now()); -} - -#endif // !_LIBCPP_HAS_NO_THREADS - -_LIBCPP_END_NAMESPACE_STD - -_LIBCPP_POP_MACROS - -#endif // _LIBCPP___MUTEX_BASE diff --git a/libcxx/include/__ranges/drop_view.h b/libcxx/include/__ranges/drop_view.h index b97505b578552..87668c341cb67 100644 --- a/libcxx/include/__ranges/drop_view.h +++ b/libcxx/include/__ranges/drop_view.h @@ -74,7 +74,7 @@ namespace ranges { drop_view() requires default_initializable<_View> = default; _LIBCPP_HIDE_FROM_ABI - constexpr drop_view(_View __base, range_difference_t<_View> __count) + constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 drop_view(_View __base, range_difference_t<_View> __count) : __count_(__count) , __base_(std::move(__base)) { diff --git a/libcxx/include/__ranges/drop_while_view.h b/libcxx/include/__ranges/drop_while_view.h index 7c28992f18742..518feae4e2a98 100644 --- a/libcxx/include/__ranges/drop_while_view.h +++ b/libcxx/include/__ranges/drop_while_view.h @@ -51,7 +51,7 @@ class drop_while_view : public view_interface> { requires default_initializable<_View> && default_initializable<_Pred> = default; - _LIBCPP_HIDE_FROM_ABI constexpr drop_while_view(_View __base, _Pred __pred) + _LIBCPP_HIDE_FROM_ABI constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 drop_while_view(_View __base, _Pred __pred) : __base_(std::move(__base)), __pred_(std::in_place, std::move(__pred)) {} _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& diff --git a/libcxx/include/__ranges/filter_view.h b/libcxx/include/__ranges/filter_view.h index bf1481b7f9156..28d08c8a67e3b 100644 --- a/libcxx/include/__ranges/filter_view.h +++ b/libcxx/include/__ranges/filter_view.h @@ -64,10 +64,8 @@ namespace ranges { _LIBCPP_HIDE_FROM_ABI filter_view() requires default_initializable<_View> && default_initializable<_Pred> = default; - _LIBCPP_HIDE_FROM_ABI - constexpr filter_view(_View __base, _Pred __pred) - : __base_(std::move(__base)), __pred_(in_place, std::move(__pred)) - { } + _LIBCPP_HIDE_FROM_ABI constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 filter_view(_View __base, _Pred __pred) + : __base_(std::move(__base)), __pred_(in_place, std::move(__pred)) {} template _LIBCPP_HIDE_FROM_ABI diff --git a/libcxx/include/__ranges/iota_view.h b/libcxx/include/__ranges/iota_view.h index 67cf0b73ecd65..57139426724bd 100644 --- a/libcxx/include/__ranges/iota_view.h +++ b/libcxx/include/__ranges/iota_view.h @@ -314,7 +314,7 @@ namespace ranges { constexpr explicit iota_view(_Start __value) : __value_(std::move(__value)) { } _LIBCPP_HIDE_FROM_ABI - constexpr iota_view(type_identity_t<_Start> __value, type_identity_t<_BoundSentinel> __bound_sentinel) + constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 iota_view(type_identity_t<_Start> __value, type_identity_t<_BoundSentinel> __bound_sentinel) : __value_(std::move(__value)), __bound_sentinel_(std::move(__bound_sentinel)) { // Validate the precondition if possible. if constexpr (totally_ordered_with<_Start, _BoundSentinel>) { @@ -324,17 +324,17 @@ namespace ranges { } _LIBCPP_HIDE_FROM_ABI - constexpr iota_view(__iterator __first, __iterator __last) + constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 iota_view(__iterator __first, __iterator __last) requires same_as<_Start, _BoundSentinel> : iota_view(std::move(__first.__value_), std::move(__last.__value_)) {} _LIBCPP_HIDE_FROM_ABI - constexpr iota_view(__iterator __first, _BoundSentinel __last) + constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 iota_view(__iterator __first, _BoundSentinel __last) requires same_as<_BoundSentinel, unreachable_sentinel_t> : iota_view(std::move(__first.__value_), std::move(__last)) {} _LIBCPP_HIDE_FROM_ABI - constexpr iota_view(__iterator __first, __sentinel __last) + constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 iota_view(__iterator __first, __sentinel __last) requires(!same_as<_Start, _BoundSentinel> && !same_as<_Start, unreachable_sentinel_t>) : iota_view(std::move(__first.__value_), std::move(__last.__bound_sentinel_)) {} diff --git a/libcxx/include/__ranges/lazy_split_view.h b/libcxx/include/__ranges/lazy_split_view.h index b5b0e7ef02307..186a0af320f14 100644 --- a/libcxx/include/__ranges/lazy_split_view.h +++ b/libcxx/include/__ranges/lazy_split_view.h @@ -82,14 +82,14 @@ class lazy_split_view : public view_interface> requires default_initializable<_View> && default_initializable<_Pattern> = default; _LIBCPP_HIDE_FROM_ABI - constexpr lazy_split_view(_View __base, _Pattern __pattern) + constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 lazy_split_view(_View __base, _Pattern __pattern) : __base_(std::move(__base)), __pattern_(std::move(__pattern)) {} template requires constructible_from<_View, views::all_t<_Range>> && constructible_from<_Pattern, single_view>> _LIBCPP_HIDE_FROM_ABI - constexpr lazy_split_view(_Range&& __r, range_value_t<_Range> __e) + constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 lazy_split_view(_Range&& __r, range_value_t<_Range> __e) : __base_(views::all(std::forward<_Range>(__r))) , __pattern_(views::single(std::move(__e))) {} diff --git a/libcxx/include/__ranges/split_view.h b/libcxx/include/__ranges/split_view.h index 6ebe5a43ed228..a27ac4ef7a196 100644 --- a/libcxx/include/__ranges/split_view.h +++ b/libcxx/include/__ranges/split_view.h @@ -75,13 +75,14 @@ class split_view : public view_interface> { requires default_initializable<_View> && default_initializable<_Pattern> = default; - _LIBCPP_HIDE_FROM_ABI constexpr split_view(_View __base, _Pattern __pattern) + _LIBCPP_HIDE_FROM_ABI constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 split_view(_View __base, _Pattern __pattern) : __base_(std::move(__base)), __pattern_(std::move((__pattern))) {} template requires constructible_from<_View, views::all_t<_Range>> && constructible_from<_Pattern, single_view>> - _LIBCPP_HIDE_FROM_ABI constexpr split_view(_Range&& __range, range_value_t<_Range> __elem) + _LIBCPP_HIDE_FROM_ABI constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 + split_view(_Range&& __range, range_value_t<_Range> __elem) : __base_(views::all(std::forward<_Range>(__range))), __pattern_(views::single(std::move(__elem))) {} _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& diff --git a/libcxx/include/__ranges/take_view.h b/libcxx/include/__ranges/take_view.h index ec859e920ff17..111e7e5ba2516 100644 --- a/libcxx/include/__ranges/take_view.h +++ b/libcxx/include/__ranges/take_view.h @@ -67,7 +67,8 @@ class take_view : public view_interface> { _LIBCPP_HIDE_FROM_ABI take_view() requires default_initializable<_View> = default; - _LIBCPP_HIDE_FROM_ABI constexpr take_view(_View __base, range_difference_t<_View> __count) + _LIBCPP_HIDE_FROM_ABI + constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 take_view(_View __base, range_difference_t<_View> __count) : __base_(std::move(__base)), __count_(__count) { _LIBCPP_ASSERT(__count >= 0, "count has to be greater than or equal to zero"); } diff --git a/libcxx/include/__ranges/take_while_view.h b/libcxx/include/__ranges/take_while_view.h index 77d7390dceb9c..d1f1bfe75411f 100644 --- a/libcxx/include/__ranges/take_while_view.h +++ b/libcxx/include/__ranges/take_while_view.h @@ -67,7 +67,7 @@ class take_while_view : public view_interface> { requires default_initializable<_View> && default_initializable<_Pred> = default; - _LIBCPP_HIDE_FROM_ABI constexpr take_while_view(_View __base, _Pred __pred) + _LIBCPP_HIDE_FROM_ABI constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 take_while_view(_View __base, _Pred __pred) : __base_(std::move(__base)), __pred_(std::in_place, std::move(__pred)) {} _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& diff --git a/libcxx/include/__ranges/transform_view.h b/libcxx/include/__ranges/transform_view.h index a71350f0c99dc..14bd400e6d079 100644 --- a/libcxx/include/__ranges/transform_view.h +++ b/libcxx/include/__ranges/transform_view.h @@ -71,7 +71,7 @@ class transform_view : public view_interface> { requires default_initializable<_View> && default_initializable<_Fn> = default; _LIBCPP_HIDE_FROM_ABI - constexpr transform_view(_View __base, _Fn __func) + constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 transform_view(_View __base, _Fn __func) : __func_(std::in_place, std::move(__func)), __base_(std::move(__base)) {} _LIBCPP_HIDE_FROM_ABI diff --git a/libcxx/include/condition_variable b/libcxx/include/condition_variable index bb7b13c6fe3eb..e1eec6066ec2e 100644 --- a/libcxx/include/condition_variable +++ b/libcxx/include/condition_variable @@ -107,10 +107,18 @@ public: */ #include <__assert> // all public C++ headers provide the assertion handler +#include <__chrono/duration.h> +#include <__chrono/steady_clock.h> +#include <__chrono/time_point.h> +#include <__condition_variable/condition_variable.h> #include <__config> #include <__memory/shared_ptr.h> #include <__memory/unique_ptr.h> -#include <__mutex_base> +#include <__mutex/lock_guard.h> +#include <__mutex/mutex.h> +#include <__mutex/tag_types.h> +#include <__mutex/unique_lock.h> +#include <__utility/move.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -271,7 +279,14 @@ _LIBCPP_END_NAMESPACE_STD #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include # include +# include +# include +# include +# include +# include +# include # include +# include #endif #endif // _LIBCPP_CONDITION_VARIABLE diff --git a/libcxx/include/libcxx.imp b/libcxx/include/libcxx.imp index d85ae36953a1e..b9a670c7be50b 100644 --- a/libcxx/include/libcxx.imp +++ b/libcxx/include/libcxx.imp @@ -22,6 +22,7 @@ { include: [ "@<__chrono/.*>", "private", "", "public" ] }, { include: [ "@<__compare/.*>", "private", "", "public" ] }, { include: [ "@<__concepts/.*>", "private", "", "public" ] }, + { include: [ "@<__condition_variable/.*>", "private", "", "public" ] }, { include: [ "@<__coroutine/.*>", "private", "", "public" ] }, { include: [ "@<__debug_utils/.*>", "private", "", "public" ] }, { include: [ "@<__exception/.*>", "private", "", "public" ] }, @@ -34,6 +35,7 @@ { include: [ "@<__iterator/.*>", "private", "", "public" ] }, { include: [ "@<__memory/.*>", "private", "", "public" ] }, { include: [ "@<__memory_resource/.*>", "private", "", "public" ] }, + { include: [ "@<__mutex/.*>", "private", "", "public" ] }, { include: [ "@<__numeric/.*>", "private", "", "public" ] }, { include: [ "@<__random/.*>", "private", "", "public" ] }, { include: [ "@<__ranges/.*>", "private", "", "public" ] }, diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in index 89a03cbb341d3..7bae70fafee70 100644 --- a/libcxx/include/module.modulemap.in +++ b/libcxx/include/module.modulemap.in @@ -813,6 +813,10 @@ module std [system] { module condition_variable { header "condition_variable" export * + + module __condition_variable { + module condition_variable { private header "__condition_variable/condition_variable.h" } + } } module coroutine { header "coroutine" @@ -1174,6 +1178,13 @@ module std [system] { @requires_LIBCXX_ENABLE_THREADS@ header "mutex" export * + + module __mutex { + module lock_guard { private header "__mutex/lock_guard.h" } + module mutex { private header "__mutex/mutex.h" } + module tag_types { private header "__mutex/tag_types.h" } + module unique_lock { private header "__mutex/unique_lock.h" } + } } module new { header "new" @@ -1695,7 +1706,6 @@ module std [system] { private header "__locale" export * } module __mbstate_t { private header "__mbstate_t.h" export * } - module __mutex_base { private header "__mutex_base" export * } module __node_handle { private header "__node_handle" export * } module __split_buffer { private header "__split_buffer" export * } module __std_stream { diff --git a/libcxx/include/mutex b/libcxx/include/mutex index 9d24768d6c0e2..3e89aaa4e6e6e 100644 --- a/libcxx/include/mutex +++ b/libcxx/include/mutex @@ -187,9 +187,15 @@ template */ #include <__assert> // all public C++ headers provide the assertion handler +#include <__chrono/steady_clock.h> +#include <__chrono/time_point.h> +#include <__condition_variable/condition_variable.h> #include <__config> #include <__memory/shared_ptr.h> -#include <__mutex_base> +#include <__mutex/lock_guard.h> +#include <__mutex/mutex.h> +#include <__mutex/tag_types.h> +#include <__mutex/unique_lock.h> #include <__threading_support> #include <__utility/forward.h> #include @@ -706,9 +712,15 @@ _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include # include +# include +# include # include # include +# include +# include +# include # include +# include #endif #endif // _LIBCPP_MUTEX diff --git a/libcxx/include/shared_mutex b/libcxx/include/shared_mutex index 550f8344ae19a..dd142f4d53600 100644 --- a/libcxx/include/shared_mutex +++ b/libcxx/include/shared_mutex @@ -124,7 +124,18 @@ template #include <__assert> // all public C++ headers provide the assertion handler #include <__availability> +#include <__chrono/duration.h> +#include <__chrono/steady_clock.h> +#include <__chrono/time_point.h> +#include <__condition_variable/condition_variable.h> #include <__config> +#include <__memory/addressof.h> +#include <__mutex/mutex.h> +#include <__mutex/tag_types.h> +#include <__mutex/unique_lock.h> +#include <__utility/swap.h> +#include +#include #include _LIBCPP_PUSH_MACROS @@ -133,8 +144,6 @@ _LIBCPP_PUSH_MACROS #if _LIBCPP_STD_VER >= 14 || defined(_LIBCPP_BUILDING_LIBRARY) -#include <__mutex_base> - #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif diff --git a/libcxx/include/thread b/libcxx/include/thread index 13e722ca9476a..19c8c2df89fd4 100644 --- a/libcxx/include/thread +++ b/libcxx/include/thread @@ -84,12 +84,16 @@ void sleep_for(const chrono::duration& rel_time); */ #include <__assert> // all public C++ headers provide the assertion handler +#include <__chrono/steady_clock.h> +#include <__chrono/time_point.h> +#include <__condition_variable/condition_variable.h> #include <__config> #include <__exception/terminate.h> #include <__functional/hash.h> #include <__memory/addressof.h> #include <__memory/unique_ptr.h> -#include <__mutex_base> +#include <__mutex/mutex.h> +#include <__mutex/unique_lock.h> #include <__thread/poll_with_backoff.h> #include <__thread/timed_backoff_policy.h> #include <__threading_support> @@ -416,7 +420,10 @@ _LIBCPP_POP_MACROS #endif #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +# include # include +# include #endif #endif // _LIBCPP_THREAD diff --git a/libcxx/include/vector b/libcxx/include/vector index 980af582558be..2361ba8e215ff 100644 --- a/libcxx/include/vector +++ b/libcxx/include/vector @@ -3328,7 +3328,7 @@ inline constexpr bool __format::__enable_insertable> = true; template // Since is-vector-bool-reference is only used once it's inlined here. requires same_as> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<_Tp, CharT> { +struct _LIBCPP_TEMPLATE_VIS formatter<_Tp, CharT> { private: formatter __underlying_; diff --git a/libcxx/src/shared_mutex.cpp b/libcxx/src/shared_mutex.cpp index 73d4dc1c1c7a9..b1976c11d7ef2 100644 --- a/libcxx/src/shared_mutex.cpp +++ b/libcxx/src/shared_mutex.cpp @@ -10,6 +10,7 @@ #ifndef _LIBCPP_HAS_NO_THREADS +#include #include #if defined(__ELF__) && defined(_LIBCPP_LINK_PTHREAD_LIB) # pragma comment(lib, "pthread") diff --git a/libcxx/test/libcxx/algorithms/debug_less.pass.cpp b/libcxx/test/libcxx/algorithms/debug_less.pass.cpp index 4d6422a07a020..6085aa9c65d31 100644 --- a/libcxx/test/libcxx/algorithms/debug_less.pass.cpp +++ b/libcxx/test/libcxx/algorithms/debug_less.pass.cpp @@ -207,7 +207,7 @@ void test_non_const_arg_cmp() { struct ValueIterator { typedef std::input_iterator_tag iterator_category; typedef std::size_t value_type; - typedef ptrdiff_t difference_type; + typedef std::ptrdiff_t difference_type; typedef std::size_t reference; typedef std::size_t* pointer; diff --git a/libcxx/test/libcxx/numerics/numeric.ops/midpoint.integer.pass.cpp b/libcxx/test/libcxx/numerics/numeric.ops/midpoint.integer.pass.cpp index 8df3f8ad8cade..302948756b198 100644 --- a/libcxx/test/libcxx/numerics/numeric.ops/midpoint.integer.pass.cpp +++ b/libcxx/test/libcxx/numerics/numeric.ops/midpoint.integer.pass.cpp @@ -62,7 +62,7 @@ int main(int, char**) #endif test(); - test(); + test(); test(); return 0; diff --git a/libcxx/test/libcxx/private_headers.verify.cpp b/libcxx/test/libcxx/private_headers.verify.cpp index dd2a655642e51..263c05a8b2ce5 100644 --- a/libcxx/test/libcxx/private_headers.verify.cpp +++ b/libcxx/test/libcxx/private_headers.verify.cpp @@ -328,6 +328,7 @@ END-SCRIPT #include <__concepts/semiregular.h> // expected-error@*:* {{use of private header from outside its module: '__concepts/semiregular.h'}} #include <__concepts/swappable.h> // expected-error@*:* {{use of private header from outside its module: '__concepts/swappable.h'}} #include <__concepts/totally_ordered.h> // expected-error@*:* {{use of private header from outside its module: '__concepts/totally_ordered.h'}} +#include <__condition_variable/condition_variable.h> // expected-error@*:* {{use of private header from outside its module: '__condition_variable/condition_variable.h'}} #include <__coroutine/coroutine_handle.h> // expected-error@*:* {{use of private header from outside its module: '__coroutine/coroutine_handle.h'}} #include <__coroutine/coroutine_traits.h> // expected-error@*:* {{use of private header from outside its module: '__coroutine/coroutine_traits.h'}} #include <__coroutine/noop_coroutine_handle.h> // expected-error@*:* {{use of private header from outside its module: '__coroutine/noop_coroutine_handle.h'}} @@ -505,7 +506,10 @@ END-SCRIPT #include <__memory_resource/pool_options.h> // expected-error@*:* {{use of private header from outside its module: '__memory_resource/pool_options.h'}} #include <__memory_resource/synchronized_pool_resource.h> // expected-error@*:* {{use of private header from outside its module: '__memory_resource/synchronized_pool_resource.h'}} #include <__memory_resource/unsynchronized_pool_resource.h> // expected-error@*:* {{use of private header from outside its module: '__memory_resource/unsynchronized_pool_resource.h'}} -#include <__mutex_base> // expected-error@*:* {{use of private header from outside its module: '__mutex_base'}} +#include <__mutex/lock_guard.h> // expected-error@*:* {{use of private header from outside its module: '__mutex/lock_guard.h'}} +#include <__mutex/mutex.h> // expected-error@*:* {{use of private header from outside its module: '__mutex/mutex.h'}} +#include <__mutex/tag_types.h> // expected-error@*:* {{use of private header from outside its module: '__mutex/tag_types.h'}} +#include <__mutex/unique_lock.h> // expected-error@*:* {{use of private header from outside its module: '__mutex/unique_lock.h'}} #include <__node_handle> // expected-error@*:* {{use of private header from outside its module: '__node_handle'}} #include <__numeric/accumulate.h> // expected-error@*:* {{use of private header from outside its module: '__numeric/accumulate.h'}} #include <__numeric/adjacent_difference.h> // expected-error@*:* {{use of private header from outside its module: '__numeric/adjacent_difference.h'}} diff --git a/libcxx/test/libcxx/thread/thread.condition/thread.condition.condvar/native_handle.pass.cpp b/libcxx/test/libcxx/thread/thread.condition/thread.condition.condvar/native_handle.pass.cpp index 374aa2fd15350..13d1bfcb88126 100644 --- a/libcxx/test/libcxx/thread/thread.condition/thread.condition.condvar/native_handle.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.condition/thread.condition.condvar/native_handle.pass.cpp @@ -17,8 +17,10 @@ // typedef pthread_cond_t* native_handle_type; // native_handle_type native_handle(); -#include #include +#include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/libcxx/transitive_includes/cxx03.csv b/libcxx/test/libcxx/transitive_includes/cxx03.csv index 92a84f59268bb..e52cf25b099ae 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx03.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx03.csv @@ -734,6 +734,14 @@ set stdexcept set tuple set type_traits set version +shared_mutex cerrno +shared_mutex cstddef +shared_mutex ctime +shared_mutex iosfwd +shared_mutex limits +shared_mutex ratio +shared_mutex system_error +shared_mutex type_traits shared_mutex version source_location cstdint source_location version diff --git a/libcxx/test/libcxx/transitive_includes/cxx11.csv b/libcxx/test/libcxx/transitive_includes/cxx11.csv index 833e8ac19f8e9..3f322fefe710f 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx11.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx11.csv @@ -735,6 +735,14 @@ set stdexcept set tuple set type_traits set version +shared_mutex cerrno +shared_mutex cstddef +shared_mutex ctime +shared_mutex iosfwd +shared_mutex limits +shared_mutex ratio +shared_mutex system_error +shared_mutex type_traits shared_mutex version source_location cstdint source_location version diff --git a/libcxx/test/libcxx/transitive_includes/cxx14.csv b/libcxx/test/libcxx/transitive_includes/cxx14.csv index c3c0ae6f71e8d..5503cb3ed1bf9 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx14.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx14.csv @@ -737,6 +737,7 @@ set stdexcept set tuple set type_traits set version +shared_mutex cerrno shared_mutex cstddef shared_mutex ctime shared_mutex iosfwd diff --git a/libcxx/test/libcxx/transitive_includes/cxx17.csv b/libcxx/test/libcxx/transitive_includes/cxx17.csv index c3c0ae6f71e8d..5503cb3ed1bf9 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx17.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx17.csv @@ -737,6 +737,7 @@ set stdexcept set tuple set type_traits set version +shared_mutex cerrno shared_mutex cstddef shared_mutex ctime shared_mutex iosfwd diff --git a/libcxx/test/libcxx/transitive_includes/cxx20.csv b/libcxx/test/libcxx/transitive_includes/cxx20.csv index a5be0f14d8589..6985245378c82 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx20.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx20.csv @@ -743,6 +743,7 @@ set stdexcept set tuple set type_traits set version +shared_mutex cerrno shared_mutex cstddef shared_mutex ctime shared_mutex iosfwd diff --git a/libcxx/test/libcxx/transitive_includes/cxx2b.csv b/libcxx/test/libcxx/transitive_includes/cxx2b.csv index 8bed29fafc013..9701567c65835 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx2b.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx2b.csv @@ -110,11 +110,8 @@ complex version concepts cstddef concepts version condition_variable cstddef -condition_variable cstdint condition_variable cstdlib -condition_variable cstring condition_variable ctime -condition_variable initializer_list condition_variable iosfwd condition_variable limits condition_variable new @@ -403,9 +400,7 @@ memory_resource version mutex cstddef mutex cstdint mutex cstdlib -mutex cstring mutex ctime -mutex initializer_list mutex iosfwd mutex limits mutex new @@ -519,6 +514,7 @@ set optional set stdexcept set tuple set version +shared_mutex cerrno shared_mutex cstddef shared_mutex ctime shared_mutex iosfwd @@ -591,12 +587,9 @@ system_error string system_error version thread compare thread cstddef -thread cstdint -thread cstring thread ctime thread iosfwd thread limits -thread new thread ratio thread system_error thread tuple diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.find.first.of/ranges.find_first_of.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.find.first.of/ranges.find_first_of.pass.cpp index 5673c70c394b4..cea30420428cd 100644 --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.find.first.of/ranges.find_first_of.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.find.first.of/ranges.find_first_of.pass.cpp @@ -71,7 +71,7 @@ template struct Data { std::array input1; std::array input2; - ptrdiff_t expected; + std::ptrdiff_t expected; }; template diff --git a/libcxx/test/std/atomics/atomics.types.generic/integral_typedefs.pass.cpp b/libcxx/test/std/atomics/atomics.types.generic/integral_typedefs.pass.cpp index 92bef95946979..5e52cdec28caf 100644 --- a/libcxx/test/std/atomics/atomics.types.generic/integral_typedefs.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.generic/integral_typedefs.pass.cpp @@ -64,8 +64,8 @@ int main(int, char**) static_assert((std::is_same, std::atomic_char32_t>::value), ""); // Added by LWG 2441 - static_assert((std::is_same, std::atomic_intptr_t>::value), ""); - static_assert((std::is_same, std::atomic_uintptr_t>::value), ""); + static_assert((std::is_same, std::atomic_intptr_t>::value), ""); + static_assert((std::is_same, std::atomic_uintptr_t>::value), ""); static_assert((std::is_same, std::atomic_int8_t>::value), ""); static_assert((std::is_same, std::atomic_uint8_t>::value), ""); diff --git a/libcxx/test/std/atomics/stdatomic.h.syn/types.compile.pass.cpp b/libcxx/test/std/atomics/stdatomic.h.syn/types.compile.pass.cpp index 84f49a76a810f..28125888f27df 100644 --- a/libcxx/test/std/atomics/stdatomic.h.syn/types.compile.pass.cpp +++ b/libcxx/test/std/atomics/stdatomic.h.syn/types.compile.pass.cpp @@ -200,10 +200,10 @@ void f() { static_assert(std::is_same_v, ::atomic_int_fast64_t>); static_assert(std::is_same_v, ::atomic_uint_fast64_t>); - static_assert(std::is_same_v, ::atomic_intptr_t>); - static_assert(std::is_same_v, ::atomic_uintptr_t>); + static_assert(std::is_same_v, ::atomic_intptr_t>); + static_assert(std::is_same_v, ::atomic_uintptr_t>); static_assert(std::is_same_v, ::atomic_size_t>); - static_assert(std::is_same_v, ::atomic_ptrdiff_t>); + static_assert(std::is_same_v, ::atomic_ptrdiff_t>); static_assert(std::is_same_v, ::atomic_intmax_t>); static_assert(std::is_same_v, ::atomic_uintmax_t>); diff --git a/libcxx/test/std/atomics/types.pass.cpp b/libcxx/test/std/atomics/types.pass.cpp index e0b617071c04f..cb8dde0d513db 100644 --- a/libcxx/test/std/atomics/types.pass.cpp +++ b/libcxx/test/std/atomics/types.pass.cpp @@ -61,7 +61,7 @@ struct test_atomic A a; (void)a; #if TEST_STD_VER >= 17 static_assert((std::is_same_v), ""); - static_assert((std::is_same_v), ""); + static_assert((std::is_same_v), ""); #endif } }; @@ -146,10 +146,10 @@ int main(int, char**) test< std::int64_t> (); test (); - test (); - test (); + test (); + test (); test (); - test (); + test (); test (); test (); diff --git a/libcxx/test/std/containers/container.adaptors/container.adaptors.format/format.functions.format.pass.cpp b/libcxx/test/std/containers/container.adaptors/container.adaptors.format/format.functions.format.pass.cpp index 67a34ef637eec..6753f614351d8 100644 --- a/libcxx/test/std/containers/container.adaptors/container.adaptors.format/format.functions.format.pass.cpp +++ b/libcxx/test/std/containers/container.adaptors/container.adaptors.format/format.functions.format.pass.cpp @@ -11,9 +11,8 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} // [container.adaptors.format] // For each of queue, priority_queue, and stack, the library provides the diff --git a/libcxx/test/std/containers/container.adaptors/container.adaptors.format/format.functions.vformat.pass.cpp b/libcxx/test/std/containers/container.adaptors/container.adaptors.format/format.functions.vformat.pass.cpp index a7136e7e404e8..ab9b7e1004694 100644 --- a/libcxx/test/std/containers/container.adaptors/container.adaptors.format/format.functions.vformat.pass.cpp +++ b/libcxx/test/std/containers/container.adaptors/container.adaptors.format/format.functions.vformat.pass.cpp @@ -11,9 +11,8 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} // [container.adaptors.format] // For each of queue, priority_queue, and stack, the library provides the diff --git a/libcxx/test/std/containers/container.adaptors/container.adaptors.format/format.pass.cpp b/libcxx/test/std/containers/container.adaptors/container.adaptors.format/format.pass.cpp index 8950ece9a57e8..9f978ebbbf63e 100644 --- a/libcxx/test/std/containers/container.adaptors/container.adaptors.format/format.pass.cpp +++ b/libcxx/test/std/containers/container.adaptors/container.adaptors.format/format.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // [container.adaptors.format] // For each of queue, priority_queue, and stack, the library provides the // following formatter specialization where adaptor-type is the name of the diff --git a/libcxx/test/std/containers/container.adaptors/container.adaptors.format/parse.pass.cpp b/libcxx/test/std/containers/container.adaptors/container.adaptors.format/parse.pass.cpp index b620279d76079..7a5f3ead39010 100644 --- a/libcxx/test/std/containers/container.adaptors/container.adaptors.format/parse.pass.cpp +++ b/libcxx/test/std/containers/container.adaptors/container.adaptors.format/parse.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // [container.adaptors.format] // For each of queue, priority_queue, and stack, the library provides the // following formatter specialization where adaptor-type is the name of the diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/splice_after_range.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/splice_after_range.pass.cpp index 527c72c1e7c8d..f2ab6e5faa19d 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/splice_after_range.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/splice_after_range.pass.cpp @@ -19,21 +19,21 @@ #include "test_macros.h" #include "min_allocator.h" -typedef ptrdiff_t T; +typedef std::ptrdiff_t T; const T t1[] = {0, 1, 2, 3, 4, 5, 6, 7}; const T t2[] = {10, 11, 12, 13, 14, 15}; -const ptrdiff_t size_t1 = std::end(t1) - std::begin(t1); -const ptrdiff_t size_t2 = std::end(t2) - std::begin(t2); +const std::ptrdiff_t size_t1 = std::end(t1) - std::begin(t1); +const std::ptrdiff_t size_t2 = std::end(t2) - std::begin(t2); template void -testd(const C& c, ptrdiff_t p, ptrdiff_t f, ptrdiff_t l) +testd(const C& c, std::ptrdiff_t p, ptrdiff_t f, ptrdiff_t l) { typename C::const_iterator i = c.begin(); - ptrdiff_t n1 = 0; + std::ptrdiff_t n1 = 0; for (; n1 < p; ++n1, ++i) assert(*i == t1[n1]); - for (ptrdiff_t n2 = f; n2 < l-1; ++n2, ++i) + for (std::ptrdiff_t n2 = f; n2 < l-1; ++n2, ++i) assert(*i == t2[n2]); for (; n1 < size_t1; ++n1, ++i) assert(*i == t1[n1]); @@ -42,11 +42,11 @@ testd(const C& c, ptrdiff_t p, ptrdiff_t f, ptrdiff_t l) template void -tests(const C& c, ptrdiff_t p, ptrdiff_t f, ptrdiff_t l) +tests(const C& c, std::ptrdiff_t p, ptrdiff_t f, ptrdiff_t l) { typename C::const_iterator i = c.begin(); - ptrdiff_t n = 0; - ptrdiff_t d = l > f+1 ? l-1-f : 0; + std::ptrdiff_t n = 0; + std::ptrdiff_t d = l > f+1 ? l-1-f : 0; if (d == 0 || p == f) { for (n = 0; n < size_t1; ++n, ++i) @@ -82,11 +82,11 @@ int main(int, char**) { // splicing different containers typedef std::forward_list C; - for (ptrdiff_t f = 0; f <= size_t2+1; ++f) + for (std::ptrdiff_t f = 0; f <= size_t2+1; ++f) { - for (ptrdiff_t l = f; l <= size_t2+1; ++l) + for (std::ptrdiff_t l = f; l <= size_t2+1; ++l) { - for (ptrdiff_t p = 0; p <= size_t1; ++p) + for (std::ptrdiff_t p = 0; p <= size_t1; ++p) { C c1(std::begin(t1), std::end(t1)); C c2(std::begin(t2), std::end(t2)); @@ -99,11 +99,11 @@ int main(int, char**) } // splicing within same container - for (ptrdiff_t f = 0; f <= size_t1+1; ++f) + for (std::ptrdiff_t f = 0; f <= size_t1+1; ++f) { - for (ptrdiff_t l = f; l <= size_t1; ++l) + for (std::ptrdiff_t l = f; l <= size_t1; ++l) { - for (ptrdiff_t p = 0; p <= f; ++p) + for (std::ptrdiff_t p = 0; p <= f; ++p) { C c1(std::begin(t1), std::end(t1)); @@ -111,7 +111,7 @@ int main(int, char**) std::next(c1.cbefore_begin(), f), std::next(c1.cbefore_begin(), l)); tests(c1, p, f, l); } - for (ptrdiff_t p = l; p <= size_t1; ++p) + for (std::ptrdiff_t p = l; p <= size_t1; ++p) { C c1(std::begin(t1), std::end(t1)); @@ -126,11 +126,11 @@ int main(int, char**) { // splicing different containers typedef std::forward_list> C; - for (ptrdiff_t f = 0; f <= size_t2+1; ++f) + for (std::ptrdiff_t f = 0; f <= size_t2+1; ++f) { - for (ptrdiff_t l = f; l <= size_t2+1; ++l) + for (std::ptrdiff_t l = f; l <= size_t2+1; ++l) { - for (ptrdiff_t p = 0; p <= size_t1; ++p) + for (std::ptrdiff_t p = 0; p <= size_t1; ++p) { C c1(std::begin(t1), std::end(t1)); C c2(std::begin(t2), std::end(t2)); @@ -143,11 +143,11 @@ int main(int, char**) } // splicing within same container - for (ptrdiff_t f = 0; f <= size_t1+1; ++f) + for (std::ptrdiff_t f = 0; f <= size_t1+1; ++f) { - for (ptrdiff_t l = f; l <= size_t1; ++l) + for (std::ptrdiff_t l = f; l <= size_t1; ++l) { - for (ptrdiff_t p = 0; p <= f; ++p) + for (std::ptrdiff_t p = 0; p <= f; ++p) { C c1(std::begin(t1), std::end(t1)); @@ -155,7 +155,7 @@ int main(int, char**) std::next(c1.cbefore_begin(), f), std::next(c1.cbefore_begin(), l)); tests(c1, p, f, l); } - for (ptrdiff_t p = l; p <= size_t1; ++p) + for (std::ptrdiff_t p = l; p <= size_t1; ++p) { C c1(std::begin(t1), std::end(t1)); diff --git a/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/format.functions.format.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/format.functions.format.pass.cpp index 05a0715c321ed..4f8e0337d652f 100644 --- a/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/format.functions.format.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/format.functions.format.pass.cpp @@ -11,6 +11,10 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // template diff --git a/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/format.functions.vformat.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/format.functions.vformat.pass.cpp index c5a623795957a..c94aedceedb89 100644 --- a/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/format.functions.vformat.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/format.functions.vformat.pass.cpp @@ -11,9 +11,8 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} // diff --git a/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/format.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/format.pass.cpp index acb517a068f4c..e20ea9b33035a 100644 --- a/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/format.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/format.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/parse.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/parse.pass.cpp index c099cfe1e5965..c6013ce7690dc 100644 --- a/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/parse.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/parse.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/experimental/simd/simd.cons/generator.pass.cpp b/libcxx/test/std/experimental/simd/simd.cons/generator.pass.cpp index b28e1af29a89e..05a12895e9213 100644 --- a/libcxx/test/std/experimental/simd/simd.cons/generator.pass.cpp +++ b/libcxx/test/std/experimental/simd/simd.cons/generator.pass.cpp @@ -47,7 +47,7 @@ struct identity { void compile_generator() { supported_simd128_ctor(identity()); not_supported_simd128_ctor([](int i) { return float(i); }); - not_supported_simd128_ctor([](intptr_t i) { return (int*)(i); }); + not_supported_simd128_ctor([](std::intptr_t i) { return (int*)(i); }); not_supported_simd128_ctor([](int* i) { return i; }); } diff --git a/libcxx/test/std/iterators/iterator.container/ssize.pass.cpp b/libcxx/test/std/iterators/iterator.container/ssize.pass.cpp index 9be44094a099c..e7531aec12b24 100644 --- a/libcxx/test/std/iterators/iterator.container/ssize.pass.cpp +++ b/libcxx/test/std/iterators/iterator.container/ssize.pass.cpp @@ -100,7 +100,7 @@ int main(int, char**) test_const_container ( sv ); static constexpr int arrA [] { 1, 2, 3 }; - ASSERT_SAME_TYPE(ptrdiff_t, decltype(std::ssize(arrA))); + ASSERT_SAME_TYPE(std::ptrdiff_t, decltype(std::ssize(arrA))); static_assert( std::is_signed_v, ""); test_const_array ( arrA ); diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_move.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_move.pass.cpp index a9e6d17720ab3..e0f63ac594367 100644 --- a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_move.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_move.pass.cpp @@ -28,7 +28,7 @@ template struct MaybeNoexceptMove { int x; using value_type = int; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; constexpr friend value_type&& iter_move(MaybeNoexceptMove) noexcept(IsNoexcept) { return std::move(global); diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_swap.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_swap.pass.cpp index 075930dcb0a07..aa0815390391c 100644 --- a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_swap.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_swap.pass.cpp @@ -26,7 +26,7 @@ template struct MaybeNoexceptSwap { using value_type = int; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; constexpr friend void iter_swap(MaybeNoexceptSwap, MaybeNoexceptSwap) noexcept(IsNoexcept) { } diff --git a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.cmp/sfinae.compile.pass.cpp b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.cmp/sfinae.compile.pass.cpp index 92e0e5cd8f9fa..8ead39231c0ba 100644 --- a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.cmp/sfinae.compile.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.cmp/sfinae.compile.pass.cpp @@ -49,7 +49,7 @@ struct IterBase { using iterator_category = std::bidirectional_iterator_tag; using value_type = int; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; using pointer = int*; using reference = int&; diff --git a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_move.pass.cpp b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_move.pass.cpp index 712425a0c44ff..5e35f5c8facc4 100644 --- a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_move.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_move.pass.cpp @@ -55,7 +55,7 @@ constexpr bool test() { { struct ThrowingCopyNoexceptDecrement { using value_type = int; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; ThrowingCopyNoexceptDecrement(); ThrowingCopyNoexceptDecrement(const ThrowingCopyNoexceptDecrement&); @@ -80,7 +80,7 @@ constexpr bool test() { { struct NoexceptCopyThrowingDecrement { using value_type = int; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; NoexceptCopyThrowingDecrement(); NoexceptCopyThrowingDecrement(const NoexceptCopyThrowingDecrement&) noexcept; @@ -105,7 +105,7 @@ constexpr bool test() { { struct NoexceptCopyAndDecrement { using value_type = int; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; NoexceptCopyAndDecrement(); NoexceptCopyAndDecrement(const NoexceptCopyAndDecrement&) noexcept; diff --git a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_swap.pass.cpp b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_swap.pass.cpp index d01ee2a1b85a1..7b6fb43b0001f 100644 --- a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_swap.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_swap.pass.cpp @@ -61,7 +61,7 @@ constexpr bool test() { { struct ThrowingCopyNoexceptDecrement { using value_type = int; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; ThrowingCopyNoexceptDecrement(); ThrowingCopyNoexceptDecrement(const ThrowingCopyNoexceptDecrement&); @@ -89,7 +89,7 @@ constexpr bool test() { { struct NoexceptCopyThrowingDecrement { using value_type = int; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; NoexceptCopyThrowingDecrement(); NoexceptCopyThrowingDecrement(const NoexceptCopyThrowingDecrement&) noexcept; @@ -117,7 +117,7 @@ constexpr bool test() { { struct NoexceptCopyAndDecrement { using value_type = int; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; NoexceptCopyAndDecrement(); NoexceptCopyAndDecrement(const NoexceptCopyAndDecrement&) noexcept; diff --git a/libcxx/test/std/language.support/support.types/max_align_t.compile.pass.cpp b/libcxx/test/std/language.support/support.types/max_align_t.compile.pass.cpp index aef00bbc0ddac..5e335841c2a3c 100644 --- a/libcxx/test/std/language.support/support.types/max_align_t.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.types/max_align_t.compile.pass.cpp @@ -26,5 +26,5 @@ static_assert(alignof(std::max_align_t) >= alignof(long double), ""); static_assert(alignof(std::max_align_t) >= alignof(void*), ""); #if TEST_STD_VER > 14 static_assert(alignof(std::max_align_t) <= __STDCPP_DEFAULT_NEW_ALIGNMENT__, - "max_align_t alignment should be no larger than operator new's alignment"); + "std::max_align_t alignment should be no larger than operator new's alignment"); #endif diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.fail.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.fail.cpp index 1d4699c18d8ea..89e2b6bf397b0 100644 --- a/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.fail.cpp +++ b/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.fail.cpp @@ -45,7 +45,7 @@ int main(int, char**) static_assert(toobig(), ""); // expected-error-re {{{{(static_assert|static assertion)}} expression is not an integral constant expression}} static_assert(toobig(), ""); // expected-error-re {{{{(static_assert|static assertion)}} expression is not an integral constant expression}} static_assert(toobig(), ""); // expected-error-re {{{{(static_assert|static assertion)}} expression is not an integral constant expression}} - static_assert(toobig(), ""); // expected-error-re {{{{(static_assert|static assertion)}} expression is not an integral constant expression}} + static_assert(toobig(), ""); // expected-error-re {{{{(static_assert|static assertion)}} expression is not an integral constant expression}} return 0; } diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp index 0da878d0b1f17..a4d70dc8c52c9 100644 --- a/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp +++ b/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp @@ -89,8 +89,8 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); @@ -120,7 +120,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); static_assert(test()); test(); @@ -136,7 +136,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); test(); return 0; diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp index 41d5ed1e06017..0af4cddb074d1 100644 --- a/libcxx/test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp +++ b/libcxx/test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp @@ -86,8 +86,8 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); @@ -118,7 +118,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); static_assert(test()); test(); @@ -135,7 +135,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); test(); return 0; diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp index 53fdec6bd2076..747b4e02bac5c 100644 --- a/libcxx/test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp +++ b/libcxx/test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp @@ -91,8 +91,8 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); @@ -122,7 +122,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); static_assert(test()); test(); @@ -138,7 +138,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); test(); return 0; diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp index 044a87fa41352..398fee8cafc1d 100644 --- a/libcxx/test/std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp +++ b/libcxx/test/std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp @@ -89,8 +89,8 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); @@ -120,7 +120,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); static_assert(test()); test(); @@ -136,7 +136,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); test(); return 0; diff --git a/libcxx/test/std/numerics/bit/bitops.count/countl_one.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/countl_one.pass.cpp index 06a8a6d6cc879..7b8f6da809b26 100644 --- a/libcxx/test/std/numerics/bit/bitops.count/countl_one.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.count/countl_one.pass.cpp @@ -86,8 +86,8 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); @@ -117,7 +117,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); static_assert(test()); test(); @@ -133,7 +133,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); test(); return 0; diff --git a/libcxx/test/std/numerics/bit/bitops.count/countl_zero.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/countl_zero.pass.cpp index 5a30e57c409a6..4b0f8156d621b 100644 --- a/libcxx/test/std/numerics/bit/bitops.count/countl_zero.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.count/countl_zero.pass.cpp @@ -85,8 +85,8 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); @@ -116,7 +116,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); static_assert(test()); test(); @@ -132,7 +132,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); test(); return 0; diff --git a/libcxx/test/std/numerics/bit/bitops.count/countr_one.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/countr_one.pass.cpp index 1698e31086ea4..b88a770745d5d 100644 --- a/libcxx/test/std/numerics/bit/bitops.count/countr_one.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.count/countr_one.pass.cpp @@ -90,8 +90,8 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); @@ -121,7 +121,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); static_assert(test()); test(); @@ -137,7 +137,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); test(); return 0; diff --git a/libcxx/test/std/numerics/bit/bitops.count/countr_zero.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/countr_zero.pass.cpp index 91fbab8bbed84..2f2f81d961ad9 100644 --- a/libcxx/test/std/numerics/bit/bitops.count/countr_zero.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.count/countr_zero.pass.cpp @@ -87,8 +87,8 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); @@ -118,7 +118,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); static_assert(test()); test(); @@ -134,7 +134,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); test(); return 0; diff --git a/libcxx/test/std/numerics/bit/bitops.count/popcount.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/popcount.pass.cpp index 00e60ff588176..605b84ed35158 100644 --- a/libcxx/test/std/numerics/bit/bitops.count/popcount.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.count/popcount.pass.cpp @@ -97,8 +97,8 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); @@ -128,7 +128,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); static_assert(test()); test(); @@ -144,7 +144,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); test(); return 0; diff --git a/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp b/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp index 5fe63e47403c4..2b56ae15682e0 100644 --- a/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp @@ -85,8 +85,8 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); @@ -117,7 +117,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); static_assert(test()); test(); @@ -134,7 +134,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); test(); return 0; diff --git a/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp b/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp index 1b63540cd8c45..fee122fe607b5 100644 --- a/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp @@ -86,8 +86,8 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); @@ -118,7 +118,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); static_assert(test()); test(); @@ -135,7 +135,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); test(); return 0; diff --git a/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.integer.pass.cpp b/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.integer.pass.cpp index 03f3bdaf2d561..c506d0776a02c 100644 --- a/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.integer.pass.cpp +++ b/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.integer.pass.cpp @@ -137,7 +137,7 @@ int main(int, char**) #endif // int_test(); - signed_test(); + signed_test(); unsigned_test(); return 0; diff --git a/libcxx/test/std/ranges/range.access/ssize.pass.cpp b/libcxx/test/std/ranges/range.access/ssize.pass.cpp index a15dc344512a1..ac2c5b7b6b764 100644 --- a/libcxx/test/std/ranges/range.access/ssize.pass.cpp +++ b/libcxx/test/std/ranges/range.access/ssize.pass.cpp @@ -71,7 +71,7 @@ constexpr bool test() { // This gets converted to ptrdiff_t because it's wider. ShortUnsignedReturnType c; assert(std::ranges::ssize(c) == 42); - ASSERT_SAME_TYPE(decltype(std::ranges::ssize(c)), ptrdiff_t); + ASSERT_SAME_TYPE(decltype(std::ranges::ssize(c)), std::ptrdiff_t); return true; } diff --git a/libcxx/test/std/ranges/range.adaptors/range.drop.while/ctor.view.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.drop.while/ctor.view.pass.cpp index cf9f9dbca9a56..326cabd637089 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.drop.while/ctor.view.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.drop.while/ctor.view.pass.cpp @@ -8,7 +8,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr drop_while_view(V base, Pred pred); +// constexpr drop_while_view(V base, Pred pred); // explicit since C++23 #include #include @@ -16,6 +16,8 @@ #include #include "MoveOnly.h" +#include "test_convertible.h" +#include "test_macros.h" struct View : std::ranges::view_base { MoveOnly mo; @@ -32,9 +34,23 @@ struct Pred { bool operator()(int) const; }; +// SFINAE tests. + +#if TEST_STD_VER >= 23 + +static_assert(!test_convertible, View, Pred>(), + "This constructor must be explicit"); + +#else + +static_assert( test_convertible, View, Pred>(), + "This constructor must not be explicit"); + +#endif // TEST_STD_VER >= 23 + constexpr bool test() { { - std::ranges::drop_while_view dwv = {View{{}, MoveOnly{5}}, Pred{}}; + std::ranges::drop_while_view dwv{View{{}, MoveOnly{5}}, Pred{}}; assert(dwv.pred().moved); assert(!dwv.pred().copied); assert(std::move(dwv).base().mo.get() == 5); @@ -45,5 +61,6 @@ constexpr bool test() { int main(int, char**) { test(); static_assert(test()); + return 0; } diff --git a/libcxx/test/std/ranges/range.adaptors/range.drop/ctor.view.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.drop/ctor.view.pass.cpp index 504021aa9cc48..4f4257f9102cb 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.drop/ctor.view.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.drop/ctor.view.pass.cpp @@ -8,13 +8,28 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr drop_view(V base, range_difference_t count); +// constexpr drop_view(V base, range_difference_t count); // explicit since C++23 #include +#include "test_convertible.h" #include "test_macros.h" #include "types.h" +// SFINAE tests. + +#if TEST_STD_VER >= 23 + +static_assert(!test_convertible, View, std::ranges::range_difference_t>(), + "This constructor must be explicit"); + +#else + +static_assert(test_convertible, View, std::ranges::range_difference_t>(), + "This constructor must not be explicit"); + +#endif // TEST_STD_VER >= 23 + constexpr bool test() { std::ranges::drop_view dropView1(MoveOnlyView(), 4); assert(dropView1.size() == 4); diff --git a/libcxx/test/std/ranges/range.adaptors/range.drop/types.h b/libcxx/test/std/ranges/range.adaptors/range.drop/types.h index b32c534a37e43..32bbddc05ed97 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.drop/types.h +++ b/libcxx/test/std/ranges/range.adaptors/range.drop/types.h @@ -94,4 +94,15 @@ struct CountedView : std::ranges::view_base { constexpr CountedIter end() const { return CountedIter(ForwardIter(globalBuff + 8)); } }; +struct View : std::ranges::view_base { + constexpr explicit View(int* b, int* e) : begin_(b), end_(e) { } + + constexpr int* begin() const { return begin_; } + constexpr int* end() const { return end_; } + +private: + int* begin_; + int* end_; +}; + #endif // TEST_STD_RANGES_RANGE_ADAPTORS_RANGE_DROP_TYPES_H diff --git a/libcxx/test/std/ranges/range.adaptors/range.elements/iterator/ctor.other.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.elements/iterator/ctor.other.pass.cpp index 6756474024821..4c94cd5edf62e 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.elements/iterator/ctor.other.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.elements/iterator/ctor.other.pass.cpp @@ -21,7 +21,7 @@ template struct ConvertibleIter : IterBase> { using iterator_category = std::random_access_iterator_tag; using value_type = std::tuple; - using difference_type = intptr_t; + using difference_type = std::intptr_t; bool movedFromOtherConst = false; int i = 0; diff --git a/libcxx/test/std/ranges/range.adaptors/range.elements/sentinel/equality.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.elements/sentinel/equality.pass.cpp index 55477cc997587..df95e07c97d97 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.elements/sentinel/equality.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.elements/sentinel/equality.pass.cpp @@ -23,7 +23,7 @@ struct Iter { std::tuple* it_; using value_type = std::tuple; - using difference_type = intptr_t; + using difference_type = std::intptr_t; using iterator_concept = std::input_iterator_tag; constexpr decltype(auto) operator*() const { return *it_; } diff --git a/libcxx/test/std/ranges/range.adaptors/range.elements/types.h b/libcxx/test/std/ranges/range.adaptors/range.elements/types.h index a1c0884b60719..f1ee165c3cc63 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.elements/types.h +++ b/libcxx/test/std/ranges/range.adaptors/range.elements/types.h @@ -93,7 +93,7 @@ template struct IterBase { using iterator_concept = std::random_access_iterator_tag; using value_type = std::tuple; - using difference_type = intptr_t; + using difference_type = std::intptr_t; constexpr std::tuple operator*() const { return std::tuple(5); } diff --git a/libcxx/test/std/ranges/range.adaptors/range.filter/ctor.view_pred.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.filter/ctor.view_pred.pass.cpp index 644f8deba6e85..3ccab93397147 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.filter/ctor.view_pred.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.filter/ctor.view_pred.pass.cpp @@ -8,12 +8,14 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr filter_view(View, Pred); - -#include +// constexpr filter_view(View, Pred); // explicit since C++23 #include +#include #include + +#include "test_convertible.h" +#include "test_macros.h" #include "types.h" struct Range : std::ranges::view_base { @@ -41,6 +43,20 @@ struct TrackingRange : TrackInitialization, std::ranges::view_base { int* end() const; }; +// SFINAE tests. + +#if TEST_STD_VER >= 23 + +static_assert(!test_convertible, Range, Pred>(), + "This constructor must be explicit"); + +#else + +static_assert( test_convertible, Range, Pred>(), + "This constructor must not be explicit"); + +#endif // TEST_STD_VER >= 23 + constexpr bool test() { int buff[] = {1, 2, 3, 4, 5, 6, 7, 8}; @@ -57,19 +73,6 @@ constexpr bool test() { assert(it == end); } - // Test implicit syntax - { - Range range(buff, buff + 8); - Pred pred; - std::ranges::filter_view view = {range, pred}; - auto it = view.begin(), end = view.end(); - assert(*it++ == 1); - assert(*it++ == 3); - assert(*it++ == 5); - assert(*it++ == 7); - assert(it == end); - } - // Make sure we move the view { bool moved = false, copied = false; diff --git a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/constraints.compile.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/constraints.compile.pass.cpp index 122abe6315c11..a942f43904092 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/constraints.compile.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/constraints.compile.pass.cpp @@ -66,7 +66,7 @@ namespace test3 { struct AlmostInputIterator { using value_type = char; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; using iterator_concept = int; constexpr const char& operator*() const; diff --git a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/ctor.range.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/ctor.range.pass.cpp index ebf1356afff60..91df304b79af7 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/ctor.range.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/ctor.range.pass.cpp @@ -11,7 +11,7 @@ // template // requires constructible_from> && // constructible_from>> -// constexpr lazy_split_view(Range&& r, range_value_t e); +// constexpr lazy_split_view(Range&& r, range_value_t e); // explicit since C++23 #include @@ -20,6 +20,8 @@ #include #include #include + +#include "test_convertible.h" #include "types.h" struct ElementWithCounting { @@ -88,6 +90,22 @@ static_assert( std::ranges::random_access_range); static_assert( std::ranges::view); static_assert( std::is_copy_constructible_v); +// SFINAE tests. + +#if TEST_STD_VER >= 23 + +static_assert( + !test_convertible, StrView, std::ranges::range_value_t>(), + "This constructor must be explicit"); + +#else + +static_assert( + test_convertible, StrView, std::ranges::range_value_t>(), + "This constructor must not be explicit"); + +#endif // TEST_STD_VER >= 23 + constexpr bool test() { { using V = std::ranges::lazy_split_view; diff --git a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/ctor.view.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/ctor.view.pass.cpp index 264e883beeaea..e7bf052a7e9ee 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/ctor.view.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/ctor.view.pass.cpp @@ -8,13 +8,14 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr lazy_split_view(View base, Pattern pattern); - -#include +// constexpr lazy_split_view(View base, Pattern pattern); // explicit since C++23 #include +#include #include #include + +#include "test_convertible.h" #include "types.h" struct ViewWithCounting : std::ranges::view_base { @@ -41,9 +42,27 @@ struct ViewWithCounting : std::ranges::view_base { constexpr ViewWithCounting& operator=(ViewWithCounting&&) = default; constexpr bool operator==(const ViewWithCounting&) const { return true; } }; + static_assert(std::ranges::forward_range); static_assert(std::ranges::view); +using View = ViewWithCounting; +using Pattern = ViewWithCounting; + +// SFINAE tests. + +#if TEST_STD_VER >= 23 + +static_assert(!test_convertible, View, Pattern>(), + "This constructor must be explicit"); + +#else + +static_assert( test_convertible, View, Pattern>(), + "This constructor must not be explicit"); + +#endif // TEST_STD_VER >= 23 + constexpr bool test() { // Calling the constructor with `(ForwardView, ForwardView)`. { @@ -62,9 +81,6 @@ constexpr bool test() { // Make sure the arguments are moved, not copied. { - using View = ViewWithCounting; - using Pattern = ViewWithCounting; - // Arguments are lvalues. { int view_copied = 0, view_moved = 0, pattern_copied = 0, pattern_moved = 0; diff --git a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/iter_move.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/iter_move.pass.cpp index 3e5671936191e..f9d2b3e7f8950 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/iter_move.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/iter_move.pass.cpp @@ -23,7 +23,7 @@ namespace adl { template struct MaybeNoexceptIterator { using value_type = int; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; value_type* ptr_ = nullptr; int* iter_move_invocations_ = nullptr; diff --git a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/iter_swap.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/iter_swap.pass.cpp index 7d0e8a78caedf..18fd3a31ce23c 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/iter_swap.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/iter_swap.pass.cpp @@ -24,7 +24,7 @@ namespace adl { template struct MaybeNoexceptIterator { using value_type = int; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; value_type* ptr_ = nullptr; int* iter_swap_invocations_ = nullptr; diff --git a/libcxx/test/std/ranges/range.adaptors/range.split/ctor.range.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.split/ctor.range.pass.cpp index 605e3d544b2d8..bbe08befdb419 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.split/ctor.range.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.split/ctor.range.pass.cpp @@ -11,7 +11,7 @@ // template // requires constructible_from> && // constructible_from>> -// constexpr split_view(Range&& r, range_value_t e); +// constexpr split_view(Range&& r, range_value_t e); // explicit since C++23 #include #include @@ -21,6 +21,9 @@ #include #include +#include "test_convertible.h" +#include "test_macros.h" + struct Counting { int* times_copied = nullptr; int* times_moved = nullptr; @@ -68,6 +71,22 @@ static_assert(std::ranges::random_access_range); static_assert(std::ranges::view); static_assert(std::is_copy_constructible_v); +// SFINAE tests. + +#if TEST_STD_VER >= 23 + +static_assert( + !test_convertible, StrView, std::ranges::range_value_t>(), + "This constructor must be explicit"); + +# else + +static_assert( + test_convertible, StrView, std::ranges::range_value_t>(), + "This constructor must not be explicit"); + +#endif // TEST_STD_VER >= 23 + constexpr bool test() { { using V = std::ranges::split_view; diff --git a/libcxx/test/std/ranges/range.adaptors/range.split/ctor.view.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.split/ctor.view.pass.cpp index ad206ee5ed751..963f85f8c478f 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.split/ctor.view.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.split/ctor.view.pass.cpp @@ -8,7 +8,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr split_view(View base, Pattern pattern); +// constexpr split_view(View base, Pattern pattern); // explicit since C++23 #include #include @@ -16,6 +16,9 @@ #include #include +#include "test_convertible.h" +#include "test_macros.h" + struct ViewWithCounting : std::ranges::view_base { int* times_copied = nullptr; int* times_moved = nullptr; @@ -38,6 +41,23 @@ struct ViewWithCounting : std::ranges::view_base { constexpr bool operator==(const ViewWithCounting&) const { return true; } }; +using View = ViewWithCounting; +using Pattern = ViewWithCounting; + +// SFINAE tests. + +#if TEST_STD_VER >= 23 + +static_assert(!test_convertible, View, Pattern>(), + "This constructor must be explicit"); + +#else + +static_assert( test_convertible, View, Pattern>(), + "This constructor must not be explicit"); + +#endif // TEST_STD_VER >= 23 + constexpr bool test() { { std::string_view input = "abc def"; @@ -48,9 +68,6 @@ constexpr bool test() { // Make sure the arguments are moved, not copied. { - using View = ViewWithCounting; - using Pattern = ViewWithCounting; - // Arguments are lvalues. { int view_copied = 0, view_moved = 0, pattern_copied = 0, pattern_moved = 0; diff --git a/libcxx/test/std/ranges/range.adaptors/range.split/types.h b/libcxx/test/std/ranges/range.adaptors/range.split/types.h index ff2ce38317cd9..aa47faf0f468b 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.split/types.h +++ b/libcxx/test/std/ranges/range.adaptors/range.split/types.h @@ -20,7 +20,7 @@ template struct ForwardIterBase { using iterator_concept = std::forward_iterator_tag; using value_type = int; - using difference_type = intptr_t; + using difference_type = std::intptr_t; constexpr int operator*() const { return 5; } diff --git a/libcxx/test/std/ranges/range.adaptors/range.take.while/ctor.view.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.take.while/ctor.view.pass.cpp index 7adeb6713680a..469b2698c8844 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.take.while/ctor.view.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.take.while/ctor.view.pass.cpp @@ -8,7 +8,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr take_while_view(V base, Pred pred); +// constexpr take_while_view(V base, Pred pred); // explicit since C++23 #include #include @@ -16,6 +16,8 @@ #include #include "MoveOnly.h" +#include "test_convertible.h" +#include "test_macros.h" struct View : std::ranges::view_base { MoveOnly mo; @@ -32,9 +34,23 @@ struct Pred { bool operator()(int) const; }; +// SFINAE tests. + +#if TEST_STD_VER >= 23 + +static_assert(!test_convertible, View, Pred>(), + "This constructor must be explicit"); + +#else + +static_assert(test_convertible, View, Pred>(), + "This constructor must not be explicit"); + +#endif // TEST_STD_VER >= 23 + constexpr bool test() { { - std::ranges::take_while_view twv = {View{{}, MoveOnly{5}}, Pred{}}; + std::ranges::take_while_view twv{View{{}, MoveOnly{5}}, Pred{}}; assert(twv.pred().moved); assert(!twv.pred().copied); assert(std::move(twv).base().mo.get() == 5); @@ -45,5 +61,6 @@ constexpr bool test() { int main(int, char**) { test(); static_assert(test()); + return 0; } diff --git a/libcxx/test/std/ranges/range.adaptors/range.take.while/sentinel/equality.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.take.while/sentinel/equality.pass.cpp index 3d5b835c01c27..db3e5764421af 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.take.while/sentinel/equality.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.take.while/sentinel/equality.pass.cpp @@ -26,7 +26,7 @@ struct Iter { int* it_; using value_type = int; - using difference_type = intptr_t; + using difference_type = std::intptr_t; using iterator_concept = std::input_iterator_tag; constexpr decltype(auto) operator*() const { return *it_; } diff --git a/libcxx/test/std/ranges/range.adaptors/range.take/ctor.view_count.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.take/ctor.view_count.pass.cpp index 63b936da98181..f37ffb0825ac1 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.take/ctor.view_count.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.take/ctor.view_count.pass.cpp @@ -8,16 +8,31 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr take_view(V base, range_difference_t count); +// constexpr take_view(V base, range_difference_t count); // explicit since C++23 -#include #include +#include -#include "test_macros.h" +#include "test_convertible.h" #include "test_iterators.h" +#include "test_macros.h" #include "test_range.h" #include "types.h" +// SFINAE tests. + +#if TEST_STD_VER >= 23 + +static_assert(!test_convertible, View, std::ranges::range_difference_t>(), + "This constructor must be explicit"); + +#else + +static_assert(test_convertible, View, std::ranges::range_difference_t>(), + "This constructor must be explicit"); + +#endif // TEST_STD_VER >= 23 + constexpr bool test() { int buffer[8] = {1, 2, 3, 4, 5, 6, 7, 8}; diff --git a/libcxx/test/std/ranges/range.adaptors/range.take/types.h b/libcxx/test/std/ranges/range.adaptors/range.take/types.h index 09549a9e086f0..db80e68bb21af 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.take/types.h +++ b/libcxx/test/std/ranges/range.adaptors/range.take/types.h @@ -54,4 +54,15 @@ static_assert(std::ranges::view); static_assert(std::ranges::random_access_range); static_assert(std::ranges::sized_range); +struct View : std::ranges::view_base { + constexpr explicit View(int* b, int* e) : begin_(b), end_(e) { } + + constexpr int* begin() const { return begin_; } + constexpr int* end() const { return end_; } + +private: + int* begin_; + int* end_; +}; + #endif // TEST_STD_RANGES_RANGE_ADAPTORS_RANGE_TAKE_TYPES_H diff --git a/libcxx/test/std/ranges/range.adaptors/range.transform/ctor.view_function.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.transform/ctor.view_function.pass.cpp index 7ce042603694d..63a43d189256f 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.transform/ctor.view_function.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.transform/ctor.view_function.pass.cpp @@ -8,14 +8,16 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr transform_view(View, F); +// constexpr transform_view(View, F); // explicit since C++23 +#include #include -#include +#include "test_convertible.h" +#include "test_macros.h" struct Range : std::ranges::view_base { - constexpr explicit Range(int* b, int* e) : begin_(b), end_(e) { } + constexpr explicit Range(int* b, int* e) : begin_(b), end_(e) {} constexpr int* begin() const { return begin_; } constexpr int* end() const { return end_; } @@ -28,6 +30,20 @@ struct F { constexpr int operator()(int i) const { return i + 100; } }; +// SFINAE tests. + +#if TEST_STD_VER >= 23 + +static_assert(!test_convertible, Range, F>(), + "This constructor must be explicit"); + +#else + +static_assert( test_convertible, Range, F>(), + "This constructor must not be explicit"); + +#endif // TEST_STD_VER >= 23 + constexpr bool test() { int buff[] = {1, 2, 3, 4, 5, 6, 7, 8}; @@ -41,16 +57,6 @@ constexpr bool test() { assert(view[7] == 108); } - { - Range range(buff, buff + 8); - F f; - std::ranges::transform_view view = {range, f}; - assert(view[0] == 101); - assert(view[1] == 102); - // ... - assert(view[7] == 108); - } - return true; } diff --git a/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/arithmetic.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/arithmetic.pass.cpp index ce7858f1a8921..efe64b31f79fb 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/arithmetic.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/arithmetic.pass.cpp @@ -63,7 +63,7 @@ constexpr bool test() { assert(&y1 == &(b[3])); using Iter = decltype(it1); - static_assert(canPlusEqual); + static_assert(canPlusEqual); } { @@ -83,7 +83,7 @@ constexpr bool test() { assert(&y1 == &(b[2])); using Iter = decltype(it1); - static_assert(canMinusEqual); + static_assert(canMinusEqual); } { @@ -116,12 +116,12 @@ constexpr bool test() { // One of the ranges is not random access std::ranges::zip_view v(a, b, ForwardSizedView{buffer1}); using Iter = decltype(v.begin()); - static_assert(!std::invocable, Iter, intptr_t>); - static_assert(!std::invocable, intptr_t, Iter>); - static_assert(!canPlusEqual); - static_assert(!std::invocable, Iter, intptr_t>); + static_assert(!std::invocable, Iter, std::intptr_t>); + static_assert(!std::invocable, std::intptr_t, Iter>); + static_assert(!canPlusEqual); + static_assert(!std::invocable, Iter, std::intptr_t>); static_assert(std::invocable, Iter, Iter>); - static_assert(!canMinusEqual); + static_assert(!canMinusEqual); } { diff --git a/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/compare.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/compare.pass.cpp index 19b5b99993495..fcbff722c39b3 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/compare.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/compare.pass.cpp @@ -41,7 +41,7 @@ struct LessThanIterator { using iterator_category = std::random_access_iterator_tag; using value_type = int; - using difference_type = intptr_t; + using difference_type = std::intptr_t; constexpr int& operator*() const { return *it_; } constexpr int& operator[](difference_type n) const { return it_[n]; } diff --git a/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/ctor.default.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/ctor.default.pass.cpp index 8c038abdb24f5..98078b2ce3095 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/ctor.default.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/ctor.default.pass.cpp @@ -20,7 +20,7 @@ struct PODIter { using iterator_category = std::random_access_iterator_tag; using value_type = int; - using difference_type = intptr_t; + using difference_type = std::intptr_t; constexpr int operator*() const { return i; } diff --git a/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/member_types.compile.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/member_types.compile.pass.cpp index 6b0c086d8c4fc..c19f6c2b16524 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/member_types.compile.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/member_types.compile.pass.cpp @@ -144,16 +144,16 @@ void test() { { // difference_type of single view - std::ranges::zip_view v{DiffTypeRange{}}; + std::ranges::zip_view v{DiffTypeRange{}}; using Iter = decltype(v.begin()); - static_assert(std::is_same_v); + static_assert(std::is_same_v); } { // difference_type of multiple views should be the common type - std::ranges::zip_view v{DiffTypeRange{}, DiffTypeRange{}}; + std::ranges::zip_view v{DiffTypeRange{}, DiffTypeRange{}}; using Iter = decltype(v.begin()); - static_assert(std::is_same_v>); + static_assert(std::is_same_v>); } const std::array foos{Foo{}}; diff --git a/libcxx/test/std/ranges/range.adaptors/range.zip/sentinel/minus.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.zip/sentinel/minus.pass.cpp index 7f8b513a97717..c4c85bc24e1e8 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.zip/sentinel/minus.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.zip/sentinel/minus.pass.cpp @@ -34,7 +34,7 @@ struct convertible_forward_sized_iterator { using iterator_category = std::forward_iterator_tag; using value_type = int; - using difference_type = intptr_t; + using difference_type = std::intptr_t; convertible_forward_sized_iterator() = default; constexpr convertible_forward_sized_iterator(Base it) : it_(it) {} diff --git a/libcxx/test/std/ranges/range.adaptors/range.zip/types.h b/libcxx/test/std/ranges/range.adaptors/range.zip/types.h index 299ffeac0489a..fa82b836f529b 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.zip/types.h +++ b/libcxx/test/std/ranges/range.adaptors/range.zip/types.h @@ -201,7 +201,7 @@ struct forward_sized_iterator { using iterator_category = std::forward_iterator_tag; using value_type = int; - using difference_type = intptr_t; + using difference_type = std::intptr_t; using pointer = Base; using reference = decltype(*Base{}); @@ -405,7 +405,7 @@ struct iter_move_swap_iterator { using iterator_category = std::input_iterator_tag; using value_type = int; - using difference_type = intptr_t; + using difference_type = std::intptr_t; constexpr int operator*() const { return i; } diff --git a/libcxx/test/std/ranges/range.factories/range.iota.view/ctor.first.last.pass.cpp b/libcxx/test/std/ranges/range.factories/range.iota.view/ctor.first.last.pass.cpp index 0b02cadc32609..ee0e7fceffa61 100644 --- a/libcxx/test/std/ranges/range.factories/range.iota.view/ctor.first.last.pass.cpp +++ b/libcxx/test/std/ranges/range.factories/range.iota.view/ctor.first.last.pass.cpp @@ -8,14 +8,55 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr iota_view(iterator first, see below last); +// constexpr iota_view(iterator first, see below last); // explicit since C++23 #include #include +#include "test_convertible.h" #include "test_macros.h" #include "types.h" +// SFINAE tests. + +#if TEST_STD_VER >= 23 + +std::ranges::iota_view view; + +static_assert(!test_convertible, + decltype(std::ranges::iota_view{}.begin()), + decltype(std::ranges::iota_view{}.end())>(), + "This constructor must be explicit"); + +static_assert(!test_convertible, + decltype(std::ranges::iota_view{SomeInt{0}}.begin()), + decltype(std::unreachable_sentinel)>(), + "This constructor must be explicit"); + +static_assert(!test_convertible>, + decltype(std::ranges::iota_view{SomeInt(0), IntComparableWith(SomeInt(10))}.begin()), + decltype(std::ranges::iota_view{SomeInt(0), IntComparableWith(SomeInt(10))}.end())>(), + "This constructor must be explicit"); + +#else + +static_assert(test_convertible, + decltype(std::ranges::iota_view{}.begin()), + decltype(std::ranges::iota_view{}.end())>(), + "This constructor must not be explicit"); + +static_assert(test_convertible, + decltype(std::ranges::iota_view{SomeInt{0}}.begin()), + decltype(std::unreachable_sentinel)>(), + "This constructor must not be explicit"); + +static_assert(test_convertible>, + decltype(std::ranges::iota_view{SomeInt(0), IntComparableWith(SomeInt(10))}.begin()), + decltype(std::ranges::iota_view{SomeInt(0), IntComparableWith(SomeInt(10))}.end())>(), + "This constructor must not be explicit"); + +#endif // TEST_STD_VER >= 23 + constexpr bool test() { { std::ranges::iota_view commonView(SomeInt(0), SomeInt(10)); diff --git a/libcxx/test/std/ranges/range.factories/range.iota.view/ctor.value.bound.pass.cpp b/libcxx/test/std/ranges/range.factories/range.iota.view/ctor.value.bound.pass.cpp index 906e0e092d2a6..7528e1ccf3ee0 100644 --- a/libcxx/test/std/ranges/range.factories/range.iota.view/ctor.value.bound.pass.cpp +++ b/libcxx/test/std/ranges/range.factories/range.iota.view/ctor.value.bound.pass.cpp @@ -14,13 +14,52 @@ TEST_CLANG_DIAGNOSTIC_IGNORED("-Wsign-compare") TEST_GCC_DIAGNOSTIC_IGNORED("-Wsign-compare") TEST_MSVC_DIAGNOSTIC_IGNORED(4018 4389) // various "signed/unsigned mismatch" -// constexpr iota_view(type_identity_t value, type_identity_t bound); +// constexpr iota_view(type_identity_t value, type_identity_t bound); // explicit since C++23 #include #include +#include "test_convertible.h" #include "types.h" +// SFINAE tests. + +#if TEST_STD_VER >= 23 + +static_assert(!test_convertible, + decltype(std::ranges::iota_view{}.begin()), + decltype(std::ranges::iota_view{}.end())>(), + "This constructor must be explicit"); + +static_assert(!test_convertible, + decltype(std::ranges::iota_view{}.begin()), + decltype(std::unreachable_sentinel)>(), + "This constructor must be explicit"); + +static_assert(!test_convertible>, + decltype(std::ranges::iota_view{SomeInt(0), IntComparableWith(SomeInt(10))}.begin()), + decltype(std::ranges::iota_view{SomeInt(0), IntComparableWith(SomeInt(10))}.end())>(), + "This constructor must be explicit"); + +#else + +static_assert( test_convertible, + decltype(std::ranges::iota_view{}.begin()), + decltype(std::ranges::iota_view{}.end())>(), + "This constructor must not be explicit"); + +static_assert( test_convertible, + decltype(std::ranges::iota_view{}.begin()), + decltype(std::unreachable_sentinel)>(), + "This constructor must not be explicit"); + +static_assert( test_convertible>, + decltype(std::ranges::iota_view{SomeInt(0), IntComparableWith(SomeInt(10))}.begin()), + decltype(std::ranges::iota_view{SomeInt(0), IntComparableWith(SomeInt(10))}.end())>(), + "This constructor must not be explicit"); + +#endif // TEST_STD_VER >= 23 + constexpr bool test() { { std::ranges::iota_view io(SomeInt(0), SomeInt(10)); diff --git a/libcxx/test/std/strings/string.view/types.pass.cpp b/libcxx/test/std/strings/string.view/types.pass.cpp index f952f20561230..25dc54d257409 100644 --- a/libcxx/test/std/strings/string.view/types.pass.cpp +++ b/libcxx/test/std/strings/string.view/types.pass.cpp @@ -47,7 +47,7 @@ test() static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); - static_assert((std::is_same::value), ""); + static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); diff --git a/libcxx/test/std/thread/futures/futures.shared_future/wait.pass.cpp b/libcxx/test/std/thread/futures/futures.shared_future/wait.pass.cpp index 12c71ab05e26e..5709e9d45df97 100644 --- a/libcxx/test/std/thread/futures/futures.shared_future/wait.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.shared_future/wait.pass.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/futures/futures.unique_future/wait.pass.cpp b/libcxx/test/std/thread/futures/futures.unique_future/wait.pass.cpp index 2385156c3154b..4e6b789e45c26 100644 --- a/libcxx/test/std/thread/futures/futures.unique_future/wait.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.unique_future/wait.pass.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp index c5536d8f37e9c..962dcc03729f4 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp @@ -22,11 +22,12 @@ // template shared_lock(shared_lock<_Mutex>) // -> shared_lock<_Mutex>; // C++17 +#include +#include +#include #include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp index fde0ed6b0b5b7..4527b6d8124bc 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp @@ -19,11 +19,13 @@ // shared_lock(mutex_type& m, try_to_lock_t); +#include +#include +#include +#include #include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp index 1bc131a7b968f..f7715168ee10d 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp @@ -19,11 +19,13 @@ // void lock(); +#include +#include +#include +#include #include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock.pass.cpp index ae387cdc8446d..0e707fcf2d50a 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock.pass.cpp @@ -17,9 +17,10 @@ // bool try_lock(); -#include #include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock_for.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock_for.pass.cpp index 7f8189c298577..d28ae395ccb0d 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock_for.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock_for.pass.cpp @@ -16,10 +16,11 @@ // template // bool try_lock_for(const chrono::duration& rel_time); -#include #include #include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock_until.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock_until.pass.cpp index fb4afdd4d9101..880bf1cbd4999 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock_until.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock_until.pass.cpp @@ -16,10 +16,11 @@ // template // bool try_lock_until(const chrono::time_point& abs_time); -#include #include #include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex.pass.cpp index bba78cf24178a..2be25748e903b 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex.pass.cpp @@ -18,10 +18,11 @@ // template unique_lock(unique_lock<_Mutex>) // -> unique_lock<_Mutex>; // C++17 +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_try_to_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_try_to_lock.pass.cpp index f4f344ef9b2c3..992d383dfa780 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_try_to_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_try_to_lock.pass.cpp @@ -16,10 +16,11 @@ // unique_lock(mutex_type& m, try_to_lock_t); +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/lock.pass.cpp index fa43f5dd874a5..4aa6660449c99 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/lock.pass.cpp @@ -16,10 +16,11 @@ // void lock(); +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock.pass.cpp index 9249959ad005a..4cf5ec2ab5ccf 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock.pass.cpp @@ -16,8 +16,9 @@ // bool try_lock(); -#include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock_for.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock_for.pass.cpp index fe29d1625069e..8e7004e5eec85 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock_for.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock_for.pass.cpp @@ -16,8 +16,9 @@ // template // bool try_lock_for(const chrono::duration& rel_time); -#include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock_until.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock_until.pass.cpp index a1e8553b965fe..077bc517399ab 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock_until.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock_until.pass.cpp @@ -16,8 +16,9 @@ // template // bool try_lock_until(const chrono::time_point& abs_time); -#include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/unlock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/unlock.pass.cpp index 4cd72b6bd8d2f..30c795150dace 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/unlock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/unlock.pass.cpp @@ -14,8 +14,9 @@ // void unlock(); -#include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/lock.pass.cpp index 6f91cc3d3ab11..b3e76cf886c4d 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/lock.pass.cpp @@ -15,10 +15,11 @@ // void lock(); +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/try_lock.pass.cpp index 645b74bca920e..bf3cb6530b3b9 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/try_lock.pass.cpp @@ -15,10 +15,11 @@ // bool try_lock(); +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/lock.pass.cpp index 51b64b9aaddbb..d9bff9b3cbda5 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/lock.pass.cpp @@ -15,10 +15,11 @@ // void lock(); +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/try_lock.pass.cpp index 801e2e738c5cb..1247c1ce1ba5f 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/try_lock.pass.cpp @@ -15,10 +15,11 @@ // bool try_lock(); +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock.pass.cpp index 24a52ae69f5af..5d20951576a82 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock.pass.cpp @@ -20,10 +20,11 @@ // void lock(); +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock_shared.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock_shared.pass.cpp index 3f5a0642ab128..eca75f005ca55 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock_shared.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock_shared.pass.cpp @@ -20,11 +20,12 @@ // void lock_shared(); +#include +#include +#include #include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock.pass.cpp index 92727eadbd9b3..bcbe7dfd78c1f 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock.pass.cpp @@ -20,10 +20,11 @@ // bool try_lock(); +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock_shared.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock_shared.pass.cpp index ac6a95bd87a52..5a54a7ecdd1f8 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock_shared.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock_shared.pass.cpp @@ -20,11 +20,12 @@ // bool try_lock_shared(); +#include +#include +#include #include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock.pass.cpp index c4836a574e9dc..ffec5056f103f 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock.pass.cpp @@ -20,12 +20,12 @@ // void lock(); -#include - #include -#include #include +#include +#include #include +#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock_shared.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock_shared.pass.cpp index 1ec0814e207ed..7187c95f2cd2d 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock_shared.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock_shared.pass.cpp @@ -20,12 +20,13 @@ // void lock_shared(); -#include #include #include +#include #include #include +#include #include #include "make_test_thread.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock.pass.cpp index 20eda45677f4e..e454ae2214bc1 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock.pass.cpp @@ -20,10 +20,11 @@ // bool try_lock(); +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared.pass.cpp index d1f37a9c42df6..9597218f36ecb 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared.pass.cpp @@ -20,11 +20,12 @@ // bool try_lock_shared(); +#include +#include +#include #include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/lock.pass.cpp index a2a91bc26a7dc..a71bd3d38b2c3 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/lock.pass.cpp @@ -15,10 +15,11 @@ // void lock(); +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock.pass.cpp index 02d0874c08069..f3942ccb9d860 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock.pass.cpp @@ -15,10 +15,11 @@ // bool try_lock(); +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/lock.pass.cpp index 91c6f1c064324..bad5a4457e516 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/lock.pass.cpp @@ -15,10 +15,11 @@ // void lock(); +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock.pass.cpp index 5915698553f5a..63be0ac713f8b 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock.pass.cpp @@ -15,10 +15,11 @@ // bool try_lock(); +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.semaphore/max.pass.cpp b/libcxx/test/std/thread/thread.semaphore/max.pass.cpp index 5a3026bc351e9..ca7ad0c92e60e 100644 --- a/libcxx/test/std/thread/thread.semaphore/max.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/max.pass.cpp @@ -21,6 +21,6 @@ int main(int, char**) static_assert(std::counting_semaphore<>::max() >= 1, ""); static_assert(std::counting_semaphore<1>::max() >= 1, ""); static_assert(std::counting_semaphore::max()>::max() >= std::numeric_limits::max(), ""); - static_assert(std::counting_semaphore::max()>::max() == std::numeric_limits::max(), ""); + static_assert(std::counting_semaphore::max()>::max() == std::numeric_limits::max(), ""); return 0; } diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.this/sleep_until.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.this/sleep_until.pass.cpp index 3b4ae203d2c3b..7a080651da393 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.this/sleep_until.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.this/sleep_until.pass.cpp @@ -13,9 +13,10 @@ // template // void sleep_until(const chrono::time_point& abs_time); -#include -#include #include +#include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/time/time.cal/time.cal.day/time.cal.day.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.day/time.cal.day.nonmembers/ostream.pass.cpp index 0e1730447e76f..b30ed775b87f0 100644 --- a/libcxx/test/std/time/time.cal/time.cal.day/time.cal.day.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.day/time.cal.day.nonmembers/ostream.pass.cpp @@ -15,6 +15,10 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.md/time.cal.md.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.md/time.cal.md.nonmembers/ostream.pass.cpp index 4e4feda0c8091..de25ac5ed86d0 100644 --- a/libcxx/test/std/time/time.cal/time.cal.md/time.cal.md.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.md/time.cal.md.nonmembers/ostream.pass.cpp @@ -16,6 +16,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.mdlast/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.mdlast/ostream.pass.cpp index 491d30bf969e2..52424227ca9af 100644 --- a/libcxx/test/std/time/time.cal/time.cal.mdlast/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.mdlast/ostream.pass.cpp @@ -18,6 +18,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.month/time.cal.month.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.month/time.cal.month.nonmembers/ostream.pass.cpp index 2efc023c611b6..86862cc9711f3 100644 --- a/libcxx/test/std/time/time.cal/time.cal.month/time.cal.month.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.month/time.cal.month.nonmembers/ostream.pass.cpp @@ -15,6 +15,10 @@ // TODO FMT Investigate Windows issues. // UNSUPPORTED: msvc, target={{.+}}-windows-gnu +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.mwd/time.cal.mwd.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.mwd/time.cal.mwd.nonmembers/ostream.pass.cpp index 006df5e29e56b..7fe5611e1496a 100644 --- a/libcxx/test/std/time/time.cal/time.cal.mwd/time.cal.mwd.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.mwd/time.cal.mwd.nonmembers/ostream.pass.cpp @@ -18,6 +18,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.nonmembers/ostream.pass.cpp index 0a76fbd471d88..677219a0ee237 100644 --- a/libcxx/test/std/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.nonmembers/ostream.pass.cpp @@ -18,6 +18,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.wdidx/time.cal.wdidx.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.wdidx/time.cal.wdidx.nonmembers/ostream.pass.cpp index 7a59350ffea10..f52406affaead 100644 --- a/libcxx/test/std/time/time.cal/time.cal.wdidx/time.cal.wdidx.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.wdidx/time.cal.wdidx.nonmembers/ostream.pass.cpp @@ -13,6 +13,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.wdlast/time.cal.wdlast.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.wdlast/time.cal.wdlast.nonmembers/ostream.pass.cpp index 90dca782880a5..86fbc6d7b185a 100644 --- a/libcxx/test/std/time/time.cal/time.cal.wdlast/time.cal.wdlast.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.wdlast/time.cal.wdlast.nonmembers/ostream.pass.cpp @@ -13,6 +13,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/ostream.pass.cpp index b52293b16d978..59c6bafdd1353 100644 --- a/libcxx/test/std/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/ostream.pass.cpp @@ -10,6 +10,10 @@ // UNSUPPORTED: no-localization // UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.year/time.cal.year.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.year/time.cal.year.nonmembers/ostream.pass.cpp index c979fc2874ca7..1ba7f6eebd011 100644 --- a/libcxx/test/std/time/time.cal/time.cal.year/time.cal.year.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.year/time.cal.year.nonmembers/ostream.pass.cpp @@ -13,6 +13,10 @@ // TODO FMT Investigate Windows issues. // UNSUPPORTED: msvc, target={{.+}}-windows-gnu +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/ostream.pass.cpp index 13257ccbbb475..624bf4d4f1fd8 100644 --- a/libcxx/test/std/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/ostream.pass.cpp @@ -18,6 +18,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.ymd/time.cal.ymd.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.ymd/time.cal.ymd.nonmembers/ostream.pass.cpp index 595e00f0008c8..64a42ff081602 100644 --- a/libcxx/test/std/time/time.cal/time.cal.ymd/time.cal.ymd.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.ymd/time.cal.ymd.nonmembers/ostream.pass.cpp @@ -16,6 +16,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/ostream.pass.cpp index 9ded734a83548..254fe6a846d83 100644 --- a/libcxx/test/std/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/ostream.pass.cpp @@ -18,6 +18,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.ymwd/time.cal.ymwd.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.ymwd/time.cal.ymwd.nonmembers/ostream.pass.cpp index 05578f6c2397b..76c74b80945b5 100644 --- a/libcxx/test/std/time/time.cal/time.cal.ymwd/time.cal.ymwd.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.ymwd/time.cal.ymwd.nonmembers/ostream.pass.cpp @@ -16,6 +16,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.ymwdlast/time.cal.ymwdlast.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.ymwdlast/time.cal.ymwdlast.nonmembers/ostream.pass.cpp index 150813379e276..478e2b88d2a91 100644 --- a/libcxx/test/std/time/time.cal/time.cal.ymwdlast/time.cal.ymwdlast.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.ymwdlast/time.cal.ymwdlast.nonmembers/ostream.pass.cpp @@ -16,6 +16,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp b/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp index 0c2f6852ecc27..416a472de5085 100644 --- a/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp +++ b/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp @@ -15,6 +15,10 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 @@ -31,6 +35,7 @@ #include #include +#include #include #include "make_string.h" diff --git a/libcxx/test/std/time/time.hms/time.hms.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.hms/time.hms.nonmembers/ostream.pass.cpp index 5bace05949e87..5aef2140e1f78 100644 --- a/libcxx/test/std/time/time.hms/time.hms.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.hms/time.hms.nonmembers/ostream.pass.cpp @@ -16,6 +16,9 @@ // TODO FMT Investigate Windows issues. // UNSUPPORTED: msvc, target={{.+}}-windows-gnu +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 @@ -27,8 +30,9 @@ // basic_ostream& // operator<<(basic_ostream& os, const hh_mm_ss& hms); -#include #include +#include +#include #include #include "make_string.h" diff --git a/libcxx/test/std/time/time.syn/formatter.day.pass.cpp b/libcxx/test/std/time/time.syn/formatter.day.pass.cpp index 373db7217e14d..2329e1b6e2451 100644 --- a/libcxx/test/std/time/time.syn/formatter.day.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.day.pass.cpp @@ -14,6 +14,10 @@ // TODO FMT Investigate Windows issues. // UNSUPPORTED: msvc, target={{.+}}-windows-gnu +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.duration.pass.cpp b/libcxx/test/std/time/time.syn/formatter.duration.pass.cpp index 1a2fccacb44d7..fd9d095603a81 100644 --- a/libcxx/test/std/time/time.syn/formatter.duration.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.duration.pass.cpp @@ -16,6 +16,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 @@ -31,6 +34,7 @@ #include #include #include +#include #include #include "formatter_tests.h" diff --git a/libcxx/test/std/time/time.syn/formatter.hh_mm_ss.pass.cpp b/libcxx/test/std/time/time.syn/formatter.hh_mm_ss.pass.cpp index 1bbe72a8e285e..a0f1ec0f8164b 100644 --- a/libcxx/test/std/time/time.syn/formatter.hh_mm_ss.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.hh_mm_ss.pass.cpp @@ -17,6 +17,9 @@ // XFAIL: LIBCXX-FREEBSD-FIXME +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 @@ -32,6 +35,7 @@ #include #include #include +#include #include #include "formatter_tests.h" diff --git a/libcxx/test/std/time/time.syn/formatter.month.pass.cpp b/libcxx/test/std/time/time.syn/formatter.month.pass.cpp index 324887f8af8c5..d479679f88498 100644 --- a/libcxx/test/std/time/time.syn/formatter.month.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.month.pass.cpp @@ -11,6 +11,10 @@ // UNSUPPORTED: no-localization // UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.month_day.pass.cpp b/libcxx/test/std/time/time.syn/formatter.month_day.pass.cpp index f86476f716325..cce2832509394 100644 --- a/libcxx/test/std/time/time.syn/formatter.month_day.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.month_day.pass.cpp @@ -17,6 +17,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.month_day_last.pass.cpp b/libcxx/test/std/time/time.syn/formatter.month_day_last.pass.cpp index 7c78271b908bb..60ddd8ba47759 100644 --- a/libcxx/test/std/time/time.syn/formatter.month_day_last.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.month_day_last.pass.cpp @@ -14,6 +14,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.month_weekday.pass.cpp b/libcxx/test/std/time/time.syn/formatter.month_weekday.pass.cpp index f34b22c00cfe6..207cc09c166e3 100644 --- a/libcxx/test/std/time/time.syn/formatter.month_weekday.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.month_weekday.pass.cpp @@ -14,6 +14,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.weekday.pass.cpp b/libcxx/test/std/time/time.syn/formatter.weekday.pass.cpp index 1b3fbe9f59adb..de8172e98282d 100644 --- a/libcxx/test/std/time/time.syn/formatter.weekday.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.weekday.pass.cpp @@ -14,6 +14,10 @@ // TODO FMT Investigate Windows issues. // UNSUPPORTED: msvc, target={{.+}}-windows-gnu +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.weekday_index.pass.cpp b/libcxx/test/std/time/time.syn/formatter.weekday_index.pass.cpp index 54930343a8d7a..a063d23427a89 100644 --- a/libcxx/test/std/time/time.syn/formatter.weekday_index.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.weekday_index.pass.cpp @@ -17,6 +17,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.weekday_last.pass.cpp b/libcxx/test/std/time/time.syn/formatter.weekday_last.pass.cpp index e3fa9ae3ba3dd..a33c57481d0a0 100644 --- a/libcxx/test/std/time/time.syn/formatter.weekday_last.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.weekday_last.pass.cpp @@ -17,6 +17,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.year.pass.cpp b/libcxx/test/std/time/time.syn/formatter.year.pass.cpp index beb47321e6644..7907033828cf2 100644 --- a/libcxx/test/std/time/time.syn/formatter.year.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.year.pass.cpp @@ -14,6 +14,10 @@ // TODO FMT Investigate Windows issues. // UNSUPPORTED: msvc, target={{.+}}-windows-gnu +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.year_month.pass.cpp b/libcxx/test/std/time/time.syn/formatter.year_month.pass.cpp index 344967d41f774..d7c65bb62ad7c 100644 --- a/libcxx/test/std/time/time.syn/formatter.year_month.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.year_month.pass.cpp @@ -14,6 +14,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.year_month_day.pass.cpp b/libcxx/test/std/time/time.syn/formatter.year_month_day.pass.cpp index 25d5a5807467b..22fada55d5768 100644 --- a/libcxx/test/std/time/time.syn/formatter.year_month_day.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.year_month_day.pass.cpp @@ -17,6 +17,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.year_month_day_last.pass.cpp b/libcxx/test/std/time/time.syn/formatter.year_month_day_last.pass.cpp index 35ce599e1a08a..5ffa9e3d9cd9f 100644 --- a/libcxx/test/std/time/time.syn/formatter.year_month_day_last.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.year_month_day_last.pass.cpp @@ -14,6 +14,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.year_month_weekday.pass.cpp b/libcxx/test/std/time/time.syn/formatter.year_month_weekday.pass.cpp index 617f183882202..775fe81fea80f 100644 --- a/libcxx/test/std/time/time.syn/formatter.year_month_weekday.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.year_month_weekday.pass.cpp @@ -14,6 +14,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.year_month_weekday_last.pass.cpp b/libcxx/test/std/time/time.syn/formatter.year_month_weekday_last.pass.cpp index 50968a39dbe64..166cc616888d6 100644 --- a/libcxx/test/std/time/time.syn/formatter.year_month_weekday_last.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.year_month_weekday_last.pass.cpp @@ -12,6 +12,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_format_args.pass.cpp b/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_format_args.pass.cpp index 2e602428df484..44fee37d40245 100644 --- a/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_format_args.pass.cpp +++ b/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_format_args.pass.cpp @@ -22,10 +22,10 @@ #include "test_macros.h" int main(int, char**) { - [[maybe_unused]] auto store = std::make_format_args(42, nullptr, false, 1.0); + [[maybe_unused]] auto store = std::make_format_args(42, nullptr, false, 'x'); LIBCPP_STATIC_ASSERT( - std::same_as>); + std::same_as>); return 0; } diff --git a/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_wformat_args.pass.cpp b/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_wformat_args.pass.cpp index 14328a1425d08..feb23c503a21f 100644 --- a/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_wformat_args.pass.cpp +++ b/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_wformat_args.pass.cpp @@ -22,10 +22,10 @@ #include "test_macros.h" int main(int, char**) { - [[maybe_unused]] auto store = std::make_wformat_args(42, nullptr, false, 1.0); + [[maybe_unused]] auto store = std::make_wformat_args(42, nullptr, false, 'x'); LIBCPP_STATIC_ASSERT( - std::same_as>); + std::same_as>); return 0; } diff --git a/libcxx/test/std/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp b/libcxx/test/std/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp index 29092a35d711c..c67d868dcfebe 100644 --- a/libcxx/test/std/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp +++ b/libcxx/test/std/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp @@ -8,9 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-has-no-incomplete-format -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} - // // template diff --git a/libcxx/test/std/utilities/format/format.arguments/format.args/get.pass.cpp b/libcxx/test/std/utilities/format/format.arguments/format.args/get.pass.cpp index 35bee3ecce59c..ecb055e3026bc 100644 --- a/libcxx/test/std/utilities/format/format.arguments/format.args/get.pass.cpp +++ b/libcxx/test/std/utilities/format/format.arguments/format.args/get.pass.cpp @@ -8,9 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-has-no-incomplete-format -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} - // // basic_format_arg get(size_t i) const noexcept; diff --git a/libcxx/test/std/utilities/format/format.error/format.error.pass.cpp b/libcxx/test/std/utilities/format/format.error/format.error.pass.cpp index e3d3c48995bc1..c2d3d6b2c8e95 100644 --- a/libcxx/test/std/utilities/format/format.error/format.error.pass.cpp +++ b/libcxx/test/std/utilities/format/format.error/format.error.pass.cpp @@ -9,9 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-has-no-incomplete-format -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} - // // class format_error; diff --git a/libcxx/test/std/utilities/format/format.fmt.string/ctor.verify.cpp b/libcxx/test/std/utilities/format/format.fmt.string/ctor.verify.cpp index 8f5404daaf396..d51531dacf734 100644 --- a/libcxx/test/std/utilities/format/format.fmt.string/ctor.verify.cpp +++ b/libcxx/test/std/utilities/format/format.fmt.string/ctor.verify.cpp @@ -33,14 +33,14 @@ void run() { (void)std::basic_format_string{"{}"}; // expected-error-re {{call to consteval function{{.*}}is not a constant expression}} (void)std::basic_format_string{"{0:{0}P}"}; // expected-error-re {{call to consteval function{{.*}}is not a constant expression}} (void)std::basic_format_string{"{0:{0}}"}; - (void)std::basic_format_string{"{0:{0}}"}; // expected-error-re {{call to consteval function{{.*}}is not a constant expression}} + (void)std::basic_format_string{"{0:{0}}"}; // expected-error-re {{call to consteval function{{.*}}is not a constant expression}} (void)std::basic_format_string{"{.3}"}; // expected-error-re {{call to consteval function{{.*}}is not a constant expression}} #ifndef TEST_HAS_NO_WIDE_CHARACTERS (void)std::basic_format_string{L"foo"}; (void)std::basic_format_string{L"{}"}; // expected-error-re {{call to consteval function{{.*}}is not a constant expression}} (void)std::basic_format_string{L"{0:{0}P}"}; // expected-error-re {{call to consteval function{{.*}}is not a constant expression}} (void)std::basic_format_string{L"{0:{0}}"}; - (void)std::basic_format_string{L"{0:{0}}"}; // expected-error-re {{call to consteval function{{.*}}is not a constant expression}} + (void)std::basic_format_string{L"{0:{0}}"}; // expected-error-re {{call to consteval function{{.*}}is not a constant expression}} (void)std::basic_format_string{L"{.3}"}; // expected-error-re {{call to consteval function{{.*}}is not a constant expression}} #endif } diff --git a/libcxx/test/std/utilities/format/format.fmt.string/get.pass.cpp b/libcxx/test/std/utilities/format/format.fmt.string/get.pass.cpp index bf7e2add0e8f5..d9a8c9f719b9a 100644 --- a/libcxx/test/std/utilities/format/format.fmt.string/get.pass.cpp +++ b/libcxx/test/std/utilities/format/format.fmt.string/get.pass.cpp @@ -35,10 +35,10 @@ template constexpr bool test() { assert((std::basic_format_string{CSTR("foo")}.get() == SV("foo"))); assert((std::basic_format_string{CSTR("{}")}.get() == SV("{}"))); - assert((std::basic_format_string{CSTR("{} {:01.23L}")}.get() == SV("{} {:01.23L}"))); + assert((std::basic_format_string{CSTR("{} {:*>6}")}.get() == SV("{} {:*>6}"))); // Embedded NUL character - assert((std::basic_format_string{SV("{}\0{}")}.get() == SV("{}\0{}"))); + assert((std::basic_format_string{SV("{}\0{}")}.get() == SV("{}\0{}"))); return true; } diff --git a/libcxx/test/std/utilities/format/format.fmt.string/types.compile.pass.cpp b/libcxx/test/std/utilities/format/format.fmt.string/types.compile.pass.cpp index 3ebd2bfc4fbd5..1ecfb5d992741 100644 --- a/libcxx/test/std/utilities/format/format.fmt.string/types.compile.pass.cpp +++ b/libcxx/test/std/utilities/format/format.fmt.string/types.compile.pass.cpp @@ -29,12 +29,11 @@ static_assert(std::same_as, std::basic_format_string>); static_assert(std::same_as, std::basic_format_string>); -static_assert(std::same_as, std::basic_format_string>); -static_assert(std::same_as, std::basic_format_string>); +static_assert(std::same_as, std::basic_format_string>); +static_assert(std::same_as, std::basic_format_string>); #ifndef TEST_HAS_NO_WIDE_CHARACTERS static_assert(std::same_as, std::basic_format_string>); static_assert(std::same_as, std::basic_format_string>); -static_assert(std::same_as, std::basic_format_string>); -static_assert( - std::same_as, std::basic_format_string>); +static_assert(std::same_as, std::basic_format_string>); +static_assert(std::same_as, std::basic_format_string>); #endif diff --git a/libcxx/test/std/utilities/format/format.formattable/concept.formattable.compile.pass.cpp b/libcxx/test/std/utilities/format/format.formattable/concept.formattable.compile.pass.cpp index 0e4708e068ec4..54c8c1bd1f170 100644 --- a/libcxx/test/std/utilities/format/format.formattable/concept.formattable.compile.pass.cpp +++ b/libcxx/test/std/utilities/format/format.formattable/concept.formattable.compile.pass.cpp @@ -8,6 +8,9 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 // UNSUPPORTED: libcpp-has-no-incomplete-format +// This test uses std::filesystem::path, which was introduced in macOS 10.15 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} + // // template @@ -107,9 +110,7 @@ void test_P0645() { assert_is_formattable<__uint128_t, CharT>(); #endif - assert_is_formattable(); - assert_is_formattable(); - assert_is_formattable(); + // floating-point types are tested in concept.formattable.float.compile.pass.cpp assert_is_formattable(); assert_is_formattable(); diff --git a/libcxx/test/std/utilities/format/format.formattable/concept.formattable.float.compile.pass.cpp b/libcxx/test/std/utilities/format/format.formattable/concept.formattable.float.compile.pass.cpp new file mode 100644 index 0000000000000..09b957f9d1682 --- /dev/null +++ b/libcxx/test/std/utilities/format/format.formattable/concept.formattable.float.compile.pass.cpp @@ -0,0 +1,58 @@ +//===----------------------------------------------------------------------===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 +// UNSUPPORTED: libcpp-has-no-incomplete-format + +// + +// template +// concept formattable = ... + +#include +#include + +#include "test_macros.h" + +template +void assert_is_not_formattable() { + static_assert(!std::formattable); +} + +template +void assert_is_formattable() { + // Only formatters for CharT == char || CharT == wchar_t are enabled for the + // standard formatters. When CharT is a different type the formatter should + // be disabled. + if constexpr (std::same_as +#ifndef TEST_HAS_NO_WIDE_CHARACTERS + || std::same_as +#endif + ) + static_assert(std::formattable); + else + assert_is_not_formattable(); +} + +template +void test() { + assert_is_formattable(); + assert_is_formattable(); + assert_is_formattable(); +} + +void test() { + test(); +#ifndef TEST_HAS_NO_WIDE_CHARACTERS + test(); +#endif + test(); + test(); + test(); + + test(); +} diff --git a/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.floating_point.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.floating_point.pass.cpp index dc40acecc8a19..9f701dfd015a8 100644 --- a/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.floating_point.pass.cpp +++ b/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.floating_point.pass.cpp @@ -8,6 +8,9 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-has-no-incomplete-format +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // [format.formatter.spec]: diff --git a/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.pointer.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.pointer.pass.cpp index 37d50f3d17017..83a3df3d1e447 100644 --- a/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.pointer.pass.cpp +++ b/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.pointer.pass.cpp @@ -59,7 +59,7 @@ void test(StringT expected, StringViewT fmt, PointerT arg) { buffer[0] = CharT('0'); buffer[1] = CharT('x'); expected.append(buffer.begin(), - std::to_chars(buffer.begin() + 2, buffer.end(), reinterpret_cast(arg), 16).ptr); + std::to_chars(buffer.begin() + 2, buffer.end(), reinterpret_cast(arg), 16).ptr); } assert(result == expected); } diff --git a/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/check_arg_id.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/check_arg_id.pass.cpp index bc6b418862525..f106105b984cf 100644 --- a/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/check_arg_id.pass.cpp +++ b/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/check_arg_id.pass.cpp @@ -9,9 +9,6 @@ // UNSUPPORTED: no-exceptions // UNSUPPORTED: libcpp-has-no-incomplete-format -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} - // // constexpr void check_arg_id(size_t id); diff --git a/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/next_arg_id.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/next_arg_id.pass.cpp index 2d6de1f2f3354..03da8fde392bb 100644 --- a/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/next_arg_id.pass.cpp +++ b/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/next_arg_id.pass.cpp @@ -9,9 +9,6 @@ // UNSUPPORTED: no-exceptions // UNSUPPORTED: libcpp-has-no-incomplete-format -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} - // // constexpr size_t next_arg_id(); diff --git a/libcxx/test/std/utilities/format/format.functions/P2418.pass.cpp b/libcxx/test/std/utilities/format/format.functions/P2418.pass.cpp index 9a5baecbb5f97..1e6893d093451 100644 --- a/libcxx/test/std/utilities/format/format.functions/P2418.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/P2418.pass.cpp @@ -10,6 +10,10 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // Tests whether a move only type can be formatted. This is required by // P2418R2 "Add support for std::generator-like types to std::format" diff --git a/libcxx/test/std/utilities/format/format.functions/ascii.pass.cpp b/libcxx/test/std/utilities/format/format.functions/ascii.pass.cpp index 4b94ebb9ec6b6..74f20f2d797d0 100644 --- a/libcxx/test/std/utilities/format/format.functions/ascii.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/ascii.pass.cpp @@ -11,6 +11,10 @@ // Force unicode to be disabled. // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_HAS_NO_UNICODE +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // Tests Unicode is ignored and handled as ASCII. diff --git a/libcxx/test/std/utilities/format/format.functions/escaped_output.ascii.pass.cpp b/libcxx/test/std/utilities/format/format.functions/escaped_output.ascii.pass.cpp index a3184e4cba4a0..911e938887f97 100644 --- a/libcxx/test/std/utilities/format/format.functions/escaped_output.ascii.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/escaped_output.ascii.pass.cpp @@ -8,6 +8,10 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 // UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // Force unicode to be disabled. // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_HAS_NO_UNICODE @@ -109,7 +113,7 @@ auto test_format_to_n = std::size_t n = expected.size(); std::basic_string out(n, CharT(' ')); std::format_to_n_result result = std::format_to_n(out.begin(), n, fmt, std::forward(args)...); - assert(result.size == static_cast(expected.size())); + assert(result.size == static_cast(expected.size())); assert(result.out == out.end()); assert(out == expected); } @@ -119,24 +123,24 @@ auto test_format_to_n = std::basic_string out(n, CharT(' ')); std::format_to_n_result result = std::format_to_n(out.begin(), n, std::locale(), fmt, std::forward(args)...); - assert(result.size == static_cast(expected.size())); + assert(result.size == static_cast(expected.size())); assert(result.out == out.end()); assert(out == expected); } #endif // TEST_HAS_NO_LOCALIZATION { - ptrdiff_t n = 0; + std::ptrdiff_t n = 0; std::basic_string out; std::format_to_n_result result = std::format_to_n(out.begin(), n, fmt, std::forward(args)...); - assert(result.size == static_cast(expected.size())); + assert(result.size == static_cast(expected.size())); assert(result.out == out.end()); assert(out.empty()); } { - ptrdiff_t n = expected.size() / 2; + std::ptrdiff_t n = expected.size() / 2; std::basic_string out(n, CharT(' ')); std::format_to_n_result result = std::format_to_n(out.begin(), n, fmt, std::forward(args)...); - assert(result.size == static_cast(expected.size())); + assert(result.size == static_cast(expected.size())); assert(result.out == out.end()); assert(out == expected.substr(0, n)); } diff --git a/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp b/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp index 0cb0257b8ea57..e3ab2d16bd26f 100644 --- a/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp @@ -15,6 +15,10 @@ // UNSUPPORTED: msvc, target={{.+}}-windows-gnu // UNSUPPORTED: LIBCXX-AIX-FIXME +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // This test the debug string type for the formatter specializations for char @@ -115,7 +119,7 @@ auto test_format_to_n = std::size_t n = expected.size(); std::basic_string out(n, CharT(' ')); std::format_to_n_result result = std::format_to_n(out.begin(), n, fmt, std::forward(args)...); - assert(result.size == static_cast(expected.size())); + assert(result.size == static_cast(expected.size())); assert(result.out == out.end()); assert(out == expected); } @@ -125,24 +129,24 @@ auto test_format_to_n = std::basic_string out(n, CharT(' ')); std::format_to_n_result result = std::format_to_n(out.begin(), n, std::locale(), fmt, std::forward(args)...); - assert(result.size == static_cast(expected.size())); + assert(result.size == static_cast(expected.size())); assert(result.out == out.end()); assert(out == expected); } #endif // TEST_HAS_NO_LOCALIZATION { - ptrdiff_t n = 0; + std::ptrdiff_t n = 0; std::basic_string out; std::format_to_n_result result = std::format_to_n(out.begin(), n, fmt, std::forward(args)...); - assert(result.size == static_cast(expected.size())); + assert(result.size == static_cast(expected.size())); assert(result.out == out.end()); assert(out.empty()); } { - ptrdiff_t n = expected.size() / 2; + std::ptrdiff_t n = expected.size() / 2; std::basic_string out(n, CharT(' ')); std::format_to_n_result result = std::format_to_n(out.begin(), n, fmt, std::forward(args)...); - assert(result.size == static_cast(expected.size())); + assert(result.size == static_cast(expected.size())); assert(result.out == out.end()); assert(out == expected.substr(0, n)); } diff --git a/libcxx/test/std/utilities/format/format.functions/format.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format.locale.pass.cpp index 61c7abd8bedc8..eb90c75da8a05 100644 --- a/libcxx/test/std/utilities/format/format.functions/format.locale.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format.locale.pass.cpp @@ -11,6 +11,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED:gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // template diff --git a/libcxx/test/std/utilities/format/format.functions/format.locale.verify.cpp b/libcxx/test/std/utilities/format/format.functions/format.locale.verify.cpp index de182c7cb528b..1281a0b61e2a8 100644 --- a/libcxx/test/std/utilities/format/format.functions/format.locale.verify.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format.locale.verify.cpp @@ -9,6 +9,10 @@ // UNSUPPORTED: no-localization // UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // Basic test to validate ill-formed code is properly detected. // diff --git a/libcxx/test/std/utilities/format/format.functions/format.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format.pass.cpp index 6b06fcf68d845..f444c51d511b0 100644 --- a/libcxx/test/std/utilities/format/format.functions/format.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format.pass.cpp @@ -10,6 +10,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // Note this formatter shows additional information when tests are failing. // This aids the development. Since other formatters fail in the same fashion // they don't have this additional output. diff --git a/libcxx/test/std/utilities/format/format.functions/format.verify.cpp b/libcxx/test/std/utilities/format/format.functions/format.verify.cpp index 23c9c2c103f1b..45829313a6267 100644 --- a/libcxx/test/std/utilities/format/format.functions/format.verify.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format.verify.cpp @@ -8,6 +8,10 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // Basic test to validate ill-formed code is properly detected. // diff --git a/libcxx/test/std/utilities/format/format.functions/format_to.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format_to.locale.pass.cpp index fd3eb06c95ffc..7079570813f20 100644 --- a/libcxx/test/std/utilities/format/format.functions/format_to.locale.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format_to.locale.pass.cpp @@ -11,6 +11,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // template diff --git a/libcxx/test/std/utilities/format/format.functions/format_to.locale.verify.cpp b/libcxx/test/std/utilities/format/format.functions/format_to.locale.verify.cpp index e3990603a5fc8..573257c228cc8 100644 --- a/libcxx/test/std/utilities/format/format.functions/format_to.locale.verify.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format_to.locale.verify.cpp @@ -9,6 +9,10 @@ // UNSUPPORTED: no-localization // UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // Basic test to validate ill-formed code is properly detected. // diff --git a/libcxx/test/std/utilities/format/format.functions/format_to.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format_to.pass.cpp index c81a902ab62b0..cf8d55714728b 100644 --- a/libcxx/test/std/utilities/format/format.functions/format_to.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format_to.pass.cpp @@ -10,6 +10,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // template diff --git a/libcxx/test/std/utilities/format/format.functions/format_to.verify.cpp b/libcxx/test/std/utilities/format/format.functions/format_to.verify.cpp index a40b532dcc922..bd1fcc2b4b777 100644 --- a/libcxx/test/std/utilities/format/format.functions/format_to.verify.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format_to.verify.cpp @@ -8,6 +8,10 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // Basic test to validate ill-formed code is properly detected. diff --git a/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.pass.cpp index 8a9a5395dcc49..de32982c1562f 100644 --- a/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.pass.cpp @@ -11,6 +11,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // template diff --git a/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.verify.cpp b/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.verify.cpp index ec4e2927c7368..b9d77de3f90f1 100644 --- a/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.verify.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.verify.cpp @@ -9,6 +9,10 @@ // UNSUPPORTED: no-localization // UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // Basic test to validate ill-formed code is properly detected. // diff --git a/libcxx/test/std/utilities/format/format.functions/format_to_n.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format_to_n.pass.cpp index b5effa320aab2..cfdeae9fd9fa6 100644 --- a/libcxx/test/std/utilities/format/format.functions/format_to_n.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format_to_n.pass.cpp @@ -10,6 +10,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // template diff --git a/libcxx/test/std/utilities/format/format.functions/format_to_n.verify.cpp b/libcxx/test/std/utilities/format/format.functions/format_to_n.verify.cpp index 7c064c584c8bb..b84615d590152 100644 --- a/libcxx/test/std/utilities/format/format.functions/format_to_n.verify.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format_to_n.verify.cpp @@ -8,6 +8,10 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // Basic test to validate ill-formed code is properly detected. // diff --git a/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp index 7d41ddb81a00f..43800b9da8a9f 100644 --- a/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp @@ -11,6 +11,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // template diff --git a/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.verify.cpp b/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.verify.cpp index dc80c12141c5f..d1b92d3fa0847 100644 --- a/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.verify.cpp +++ b/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.verify.cpp @@ -9,6 +9,10 @@ // UNSUPPORTED: no-localization // UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // Basic test to validate ill-formed code is properly detected. // diff --git a/libcxx/test/std/utilities/format/format.functions/formatted_size.pass.cpp b/libcxx/test/std/utilities/format/format.functions/formatted_size.pass.cpp index cd31da125c849..6b03d34d9b271 100644 --- a/libcxx/test/std/utilities/format/format.functions/formatted_size.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/formatted_size.pass.cpp @@ -10,6 +10,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // template diff --git a/libcxx/test/std/utilities/format/format.functions/formatted_size.verify.cpp b/libcxx/test/std/utilities/format/format.functions/formatted_size.verify.cpp index 1662d893221bb..a135dd7cc9706 100644 --- a/libcxx/test/std/utilities/format/format.functions/formatted_size.verify.cpp +++ b/libcxx/test/std/utilities/format/format.functions/formatted_size.verify.cpp @@ -8,6 +8,10 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // Basic test to validate ill-formed code is properly detected. // diff --git a/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp b/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp index d5939e255423e..9863922f9abcc 100644 --- a/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp @@ -12,6 +12,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.en_US.UTF-8 // diff --git a/libcxx/test/std/utilities/format/format.functions/unicode.pass.cpp b/libcxx/test/std/utilities/format/format.functions/unicode.pass.cpp index db659f53cf097..efe243573f04a 100644 --- a/libcxx/test/std/utilities/format/format.functions/unicode.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/unicode.pass.cpp @@ -15,6 +15,10 @@ // UNSUPPORTED msvc, target={{.+}}-windows-gnu // UNSUPPORTED: LIBCXX-AIX-FIXME +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // Tests the Unicode width support of the standard format specifiers. diff --git a/libcxx/test/std/utilities/format/format.functions/vformat.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/vformat.locale.pass.cpp index 4136123d6a9f5..7755b785518f3 100644 --- a/libcxx/test/std/utilities/format/format.functions/vformat.locale.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/vformat.locale.pass.cpp @@ -11,6 +11,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // string vformat(const locale& loc, string_view fmt, format_args args); diff --git a/libcxx/test/std/utilities/format/format.functions/vformat.pass.cpp b/libcxx/test/std/utilities/format/format.functions/vformat.pass.cpp index 8a95b5524fe9f..7c4c4a10ea5d8 100644 --- a/libcxx/test/std/utilities/format/format.functions/vformat.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/vformat.pass.cpp @@ -10,6 +10,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // string vformat(string_view fmt, format_args args); diff --git a/libcxx/test/std/utilities/format/format.functions/vformat_to.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/vformat_to.locale.pass.cpp index 6c5e7f4ab5787..e1a740253d586 100644 --- a/libcxx/test/std/utilities/format/format.functions/vformat_to.locale.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/vformat_to.locale.pass.cpp @@ -11,6 +11,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // template diff --git a/libcxx/test/std/utilities/format/format.functions/vformat_to.pass.cpp b/libcxx/test/std/utilities/format/format.functions/vformat_to.pass.cpp index 77c783411127f..92b5409e1409e 100644 --- a/libcxx/test/std/utilities/format/format.functions/vformat_to.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/vformat_to.pass.cpp @@ -10,6 +10,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/format.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/format.pass.cpp index 31b7e5658b125..8f398994d251b 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/format.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/format.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/parse.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/parse.pass.cpp index a7a160989b336..c848f4ff2fc26 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/parse.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/parse.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/set_brackets.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/set_brackets.pass.cpp index 6405b5ec22e51..c17edb28f6175 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/set_brackets.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/set_brackets.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/set_separator.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/set_separator.pass.cpp index 21cee612bb2b0..3f91e7bc633a4 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/set_separator.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/set_separator.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtkind/format_kind.compile.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtkind/format_kind.compile.pass.cpp index d343ad1b1900b..c2d2ec2968508 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtkind/format_kind.compile.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtkind/format_kind.compile.pass.cpp @@ -8,6 +8,9 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 // UNSUPPORTED: libcpp-has-no-incomplete-format +// This test uses std::filesystem::path, which was introduced in macOS 10.15 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} + // // template @@ -47,7 +50,7 @@ struct recursive_range { struct iterator { using iterator_concept = std::input_iterator_tag; using value_type = recursive_range; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; using reference = recursive_range; reference operator*() const; diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/format.functions.format.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/format.functions.format.pass.cpp index 7a0a2d18913cf..b3c8afd8bae3e 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/format.functions.format.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/format.functions.format.pass.cpp @@ -11,9 +11,8 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} // diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/format.functions.vformat.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/format.functions.vformat.pass.cpp index 613eb5ea06392..d6b0f7e9f1c0f 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/format.functions.vformat.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/format.functions.vformat.pass.cpp @@ -11,9 +11,8 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} // diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/format.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/format.pass.cpp index b459ba8ff2f2d..2275baee237cf 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/format.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/format.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/parse.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/parse.pass.cpp index 92763be54e15a..be117a6de2ecf 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/parse.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/parse.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtset/format.functions.format.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtset/format.functions.format.pass.cpp index 7df3284f72b71..1f6a550ee232f 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtset/format.functions.format.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtset/format.functions.format.pass.cpp @@ -11,9 +11,8 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} // diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtset/format.functions.vformat.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtset/format.functions.vformat.pass.cpp index e411b036acbe1..e33a0bc383d14 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtset/format.functions.vformat.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtset/format.functions.vformat.pass.cpp @@ -11,9 +11,8 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} // diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtset/format.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtset/format.pass.cpp index 5ca3bfe12012e..dcb3d67270019 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtset/format.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtset/format.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtset/parse.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtset/parse.pass.cpp index 8b25021c984d0..88940525925fa 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtset/parse.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtset/parse.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.formatter/format.functions.format.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.formatter/format.functions.format.pass.cpp index e1ab825626b56..297f7b22779f1 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.formatter/format.functions.format.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.formatter/format.functions.format.pass.cpp @@ -11,9 +11,8 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} // diff --git a/libcxx/test/std/utilities/format/format.range/format.range.formatter/format.functions.vformat.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.formatter/format.functions.vformat.pass.cpp index 98bda2debb670..c28cf547da7b3 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.formatter/format.functions.vformat.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.formatter/format.functions.vformat.pass.cpp @@ -11,9 +11,8 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} // diff --git a/libcxx/test/std/utilities/format/format.range/format.range.formatter/format.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.formatter/format.pass.cpp index 1fdc5eb726d69..faf9e1a18ee1b 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.formatter/format.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.formatter/format.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.formatter/parse.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.formatter/parse.pass.cpp index ce1c0c93130b6..c440b1ac2b168 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.formatter/parse.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.formatter/parse.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.formatter/set_brackets.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.formatter/set_brackets.pass.cpp index c399a81f2a041..0b0e3a16c4109 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.formatter/set_brackets.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.formatter/set_brackets.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.formatter/set_separator.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.formatter/set_separator.pass.cpp index 192ddcd16c44a..c63cc52403f45 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.formatter/set_separator.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.formatter/set_separator.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.formatter/underlying.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.formatter/underlying.pass.cpp index 10a330f0baf1e..52ac58a726651 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.formatter/underlying.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.formatter/underlying.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/utilities/format/format.tuple/format.functions.format.pass.cpp b/libcxx/test/std/utilities/format/format.tuple/format.functions.format.pass.cpp index 75791fb945bcf..f5853f98ca97f 100644 --- a/libcxx/test/std/utilities/format/format.tuple/format.functions.format.pass.cpp +++ b/libcxx/test/std/utilities/format/format.tuple/format.functions.format.pass.cpp @@ -11,9 +11,9 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} // diff --git a/libcxx/test/std/utilities/format/format.tuple/format.functions.format.verify.cpp b/libcxx/test/std/utilities/format/format.tuple/format.functions.format.verify.cpp index 58685f956be12..5967d8630e065 100644 --- a/libcxx/test/std/utilities/format/format.tuple/format.functions.format.verify.cpp +++ b/libcxx/test/std/utilities/format/format.tuple/format.functions.format.verify.cpp @@ -8,6 +8,10 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 // UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + #include #include diff --git a/libcxx/test/std/utilities/format/format.tuple/format.functions.vformat.pass.cpp b/libcxx/test/std/utilities/format/format.tuple/format.functions.vformat.pass.cpp index 9445ddb517cad..2cca15a6d5dc8 100644 --- a/libcxx/test/std/utilities/format/format.tuple/format.functions.vformat.pass.cpp +++ b/libcxx/test/std/utilities/format/format.tuple/format.functions.vformat.pass.cpp @@ -10,9 +10,8 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} // diff --git a/libcxx/test/std/utilities/format/format.tuple/format.pass.cpp b/libcxx/test/std/utilities/format/format.tuple/format.pass.cpp index 017201481fa54..2d2e60cc20dd9 100644 --- a/libcxx/test/std/utilities/format/format.tuple/format.pass.cpp +++ b/libcxx/test/std/utilities/format/format.tuple/format.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template... Ts> @@ -61,7 +57,7 @@ void test() { test(SV("(1)"), std::tuple{1}); test(SV("(1, 1)"), std::tuple{1, CharT('1')}); test(SV("(1, 1)"), std::pair{1, CharT('1')}); - test(SV("(1, 1, 1)"), std::tuple{1, CharT('1'), 1.0}); + test(SV("(1, 1, true)"), std::tuple{1, CharT('1'), true}); } void test() { diff --git a/libcxx/test/std/utilities/format/format.tuple/parse.pass.cpp b/libcxx/test/std/utilities/format/format.tuple/parse.pass.cpp index 05c90557cd54f..f38c9fad2df92 100644 --- a/libcxx/test/std/utilities/format/format.tuple/parse.pass.cpp +++ b/libcxx/test/std/utilities/format/format.tuple/parse.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template... Ts> @@ -64,7 +60,7 @@ constexpr void test() { test>(); test>(); test>(); - test>(); + test>(); } constexpr bool test() { diff --git a/libcxx/test/std/utilities/format/format.tuple/set_brackets.pass.cpp b/libcxx/test/std/utilities/format/format.tuple/set_brackets.pass.cpp index 74af4f32fcf1d..63efdb077502e 100644 --- a/libcxx/test/std/utilities/format/format.tuple/set_brackets.pass.cpp +++ b/libcxx/test/std/utilities/format/format.tuple/set_brackets.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template... Ts> @@ -49,7 +45,7 @@ constexpr void test() { test>(); test>(); test>(); - test>(); + test>(); } constexpr bool test() { diff --git a/libcxx/test/std/utilities/format/format.tuple/set_separator.pass.cpp b/libcxx/test/std/utilities/format/format.tuple/set_separator.pass.cpp index 0258ae215ed22..92a77b3357316 100644 --- a/libcxx/test/std/utilities/format/format.tuple/set_separator.pass.cpp +++ b/libcxx/test/std/utilities/format/format.tuple/set_separator.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // class range_formatter @@ -49,7 +45,7 @@ constexpr void test() { test>(); test>(); test>(); - test>(); + test>(); } constexpr bool test() { diff --git a/libcxx/test/std/utilities/function.objects/unord.hash/integral.pass.cpp b/libcxx/test/std/utilities/function.objects/unord.hash/integral.pass.cpp index c645ad8f476f1..124eb843d298e 100644 --- a/libcxx/test/std/utilities/function.objects/unord.hash/integral.pass.cpp +++ b/libcxx/test/std/utilities/function.objects/unord.hash/integral.pass.cpp @@ -90,7 +90,7 @@ int main(int, char**) test(); test(); - test(); + test(); test(); test(); @@ -108,7 +108,7 @@ int main(int, char**) test(); test(); - test(); + test(); #ifndef TEST_HAS_NO_INT128 test<__int128_t>(); diff --git a/libcxx/test/std/utilities/memory/temporary.buffer/overaligned.pass.cpp b/libcxx/test/std/utilities/memory/temporary.buffer/overaligned.pass.cpp index a4f534e4b212c..3e1006ad984cd 100644 --- a/libcxx/test/std/utilities/memory/temporary.buffer/overaligned.pass.cpp +++ b/libcxx/test/std/utilities/memory/temporary.buffer/overaligned.pass.cpp @@ -40,7 +40,7 @@ int main(int, char**) { std::pair ip = std::get_temporary_buffer(5); assert(!(ip.first == nullptr) ^ (ip.second == 0)); - assert(reinterpret_cast(ip.first) % alignof(A) == 0); + assert(reinterpret_cast(ip.first) % alignof(A) == 0); std::return_temporary_buffer(ip.first); return 0; diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared_construct.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared_construct.pass.cpp index e354d4a2721d5..4281cc1aa9e0d 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared_construct.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared_construct.pass.cpp @@ -123,7 +123,7 @@ struct Bar { }; void test_aligned(void* p, std::size_t align) { - assert(reinterpret_cast(p) % align == 0); + assert(reinterpret_cast(p) % align == 0); } int main(int, char**) { diff --git a/libcxx/test/std/utilities/meta/meta.unary.prop.query/alignment_of.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary.prop.query/alignment_of.pass.cpp index a1c24b745b441..66318951a8c97 100644 --- a/libcxx/test/std/utilities/meta/meta.unary.prop.query/alignment_of.pass.cpp +++ b/libcxx/test/std/utilities/meta/meta.unary.prop.query/alignment_of.pass.cpp @@ -43,8 +43,8 @@ int main(int, char**) { test_alignment_of(); test_alignment_of(); - test_alignment_of(); - test_alignment_of(); + test_alignment_of(); + test_alignment_of(); test_alignment_of(); test_alignment_of(); test_alignment_of(); diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple.include.ranges.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple.include.ranges.pass.cpp index 36f7745b7713f..716acbfdcebde 100644 --- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple.include.ranges.pass.cpp +++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple.include.ranges.pass.cpp @@ -22,8 +22,8 @@ using Iterator = int*; class SizedSentinel { public: constexpr bool operator==(int*) const; - friend constexpr ptrdiff_t operator-(const SizedSentinel&, int*); - friend constexpr ptrdiff_t operator-(int*, const SizedSentinel&); + friend constexpr std::ptrdiff_t operator-(const SizedSentinel&, int*); + friend constexpr std::ptrdiff_t operator-(int*, const SizedSentinel&); }; static_assert(std::sized_sentinel_for); diff --git a/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/allocate_deallocate_bytes.pass.cpp b/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/allocate_deallocate_bytes.pass.cpp index 0f5e2f0ae29ad..ee7e09ac1d655 100644 --- a/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/allocate_deallocate_bytes.pass.cpp +++ b/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/allocate_deallocate_bytes.pass.cpp @@ -38,10 +38,10 @@ void test() { auto ptr = static_cast(allocation); std::fill(ptr, ptr + 13, '0'); assert(last_size == 13); - assert(last_alignment == alignof(max_align_t)); + assert(last_alignment == alignof(std::max_align_t)); allocator.deallocate_bytes(allocation, 13); assert(last_size == 13); - assert(last_alignment == alignof(max_align_t)); + assert(last_alignment == alignof(std::max_align_t)); } { void* allocation = allocator.allocate_bytes(13, 64); diff --git a/libcxx/test/support/test_iterators.h b/libcxx/test/support/test_iterators.h index b03687447c14b..f0e19fc3d5e62 100644 --- a/libcxx/test/support/test_iterators.h +++ b/libcxx/test/support/test_iterators.h @@ -455,7 +455,7 @@ TEST_CONSTEXPR Iter base(Iter i) { return i; } template struct ThrowingIterator { typedef std::bidirectional_iterator_tag iterator_category; - typedef ptrdiff_t difference_type; + typedef std::ptrdiff_t difference_type; typedef const T value_type; typedef const T * pointer; typedef const T & reference; @@ -566,7 +566,7 @@ struct ThrowingIterator { template struct NonThrowingIterator { typedef std::bidirectional_iterator_tag iterator_category; - typedef ptrdiff_t difference_type; + typedef std::ptrdiff_t difference_type; typedef const T value_type; typedef const T * pointer; typedef const T & reference; @@ -916,7 +916,7 @@ class Iterator { public: using value_type = int; using reference = int&; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; private: value_type* ptr_ = nullptr; diff --git a/libcxx/utils/ci/run-buildbot b/libcxx/utils/ci/run-buildbot index 8ff6decb0a344..6591a8edb9e16 100755 --- a/libcxx/utils/ci/run-buildbot +++ b/libcxx/utils/ci/run-buildbot @@ -492,9 +492,6 @@ apple-system-backdeployment-assertions-*) PARAMS+=";unwind_runtime_root=${OSX_ROOTS}/macOS/libunwind/${DEPLOYMENT_TARGET}" PARAMS+=";use_system_cxx_lib=True" PARAMS+=";enable_assertions=True" - # TODO: Enable experimental features during back-deployment -- right now some of the availability - # annotations are incorrect, leading to test failures that could be avoided. - PARAMS+=";enable_experimental=False" generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Apple.cmake" \ -DLIBCXX_TEST_CONFIG="apple-libc++-backdeployment.cfg.in" \ @@ -533,9 +530,6 @@ apple-system-backdeployment-*) PARAMS+=";abi_runtime_root=${OSX_ROOTS}/macOS/libc++abi/${DEPLOYMENT_TARGET}" PARAMS+=";unwind_runtime_root=${OSX_ROOTS}/macOS/libunwind/${DEPLOYMENT_TARGET}" PARAMS+=";use_system_cxx_lib=True" - # TODO: Enable experimental features during back-deployment -- right now some of the availability - # annotations are incorrect, leading to test failures that could be avoided. - PARAMS+=";enable_experimental=False" generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Apple.cmake" \ -DLIBCXX_TEST_CONFIG="apple-libc++-backdeployment.cfg.in" \ diff --git a/libcxx/utils/data/ignore_format.txt b/libcxx/utils/data/ignore_format.txt index a6838ccb2b022..a89361cbebc6a 100644 --- a/libcxx/utils/data/ignore_format.txt +++ b/libcxx/utils/data/ignore_format.txt @@ -489,7 +489,6 @@ libcxx/include/__memory/uninitialized_algorithms.h libcxx/include/__memory/unique_ptr.h libcxx/include/__memory/uses_allocator.h libcxx/include/mutex -libcxx/include/__mutex_base libcxx/include/new libcxx/include/__node_handle libcxx/include/numbers diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index 0909b14d81901..603703e65290b 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -1953,7 +1953,8 @@ void Writer::writeSections() { // Fill gaps between functions in .text with INT3 instructions // instead of leaving as NUL bytes (which can be interpreted as // ADD instructions). - if (sec->header.Characteristics & IMAGE_SCN_CNT_CODE) + if ((sec->header.Characteristics & IMAGE_SCN_CNT_CODE) && + (ctx.config.machine == AMD64 || ctx.config.machine == I386)) memset(secBuf, 0xCC, sec->getRawSize()); parallelForEach(sec->chunks, [&](Chunk *c) { c->writeTo(secBuf + c->getRVA() - sec->getRVA()); diff --git a/lld/docs/WebAssembly.rst b/lld/docs/WebAssembly.rst index c40d4b322080a..dad3177e2c7df 100644 --- a/lld/docs/WebAssembly.rst +++ b/lld/docs/WebAssembly.rst @@ -75,6 +75,11 @@ WebAssembly-specific options: flag which corresponds to ``--unresolve-symbols=ignore`` + ``--import-undefined``. +.. option:: --allow-undefined-file= + + Like ``--allow-undefined``, but the filename specified a flat list of + symbols, one per line, which are allowed to be undefined. + .. option:: --unresolved-symbols= This is a more full featured version of ``--allow-undefined``. @@ -182,11 +187,39 @@ Imports By default no undefined symbols are allowed in the final binary. The flag ``--allow-undefined`` results in a WebAssembly import being defined for each undefined symbol. It is then up to the runtime to provide such symbols. +``--allow-undefined-file`` is the same but allows a list of symbols to be +specified. Alternatively symbols can be marked in the source code as with the ``import_name`` and/or ``import_module`` clang attributes which signals that they are expected to be undefined at static link time. +Stub Libraries +~~~~~~~~~~~~~~ + +Another way to specify imports and exports is via a "stub library". This +feature is inspired by the ELF stub objects which are supported by the Solaris +linker. Stub libraries are text files that can be passed as normal linker +inputs, similar to how linker scripts can be passed to the ELF linker. The stub +library is a stand-in for a set of symbols that will be available at runtime, +but doesn't contain any actual code or data. Instead it contains just a list of +symbols, one per line. Each symbol can specify zero or more dependencies. +These dependencies are symbols that must be defined, and exported, by the output +module if the symbol is question is imported/required by the output module. + +For example, imagine the runtime provides an external symbol ``foo`` that +depends on the ``malloc`` and ``free``. This can be expressed simply as:: + + #STUB + foo: malloc,free + +Here we are saying that ``foo`` is allowed to be imported (undefined) but that +if it is imported, then the output module must also export ``malloc`` and +``free`` to the runtime. If ``foo`` is imported (undefined), but the output +module does not define ``malloc`` and ``free`` then the link will fail. + +Stub libraries must begin with ``#STUB`` on a line by itself. + Garbage Collection ~~~~~~~~~~~~~~~~~~ diff --git a/lld/test/COFF/arm-thumb-thunks-multipass.s b/lld/test/COFF/arm-thumb-thunks-multipass.s index 71ce53d99b31f..c10b22963187b 100644 --- a/lld/test/COFF/arm-thumb-thunks-multipass.s +++ b/lld/test/COFF/arm-thumb-thunks-multipass.s @@ -67,4 +67,4 @@ far_func\i: // FUNC01-THUNKS: 40500a: f2c0 0c10 movt r12, #16 // FUNC01-THUNKS: 40500e: 44e7 add pc, r12 // The instruction below is padding from the .balign -// FUNC01-THUNKS: 405010: cccc ldm r4!, {r2, r3, r6, r7} +// FUNC01-THUNKS: 405010: 0000 movs r0, r0 diff --git a/lld/test/COFF/arm64-import2.test b/lld/test/COFF/arm64-import2.test index 9b95f1a29b834..342671211db87 100644 --- a/lld/test/COFF/arm64-import2.test +++ b/lld/test/COFF/arm64-import2.test @@ -18,7 +18,7 @@ # AFTER: 140001000: 94000004 bl 0x140001010 # AFTER: 140001004: 94000006 bl 0x14000101c # AFTER: 140001008: d65f03c0 ret -# AFTER: 14000100c: ccccccff +# AFTER: 14000100c: 000000ff # AFTER: 140001010: b0000010 adrp x16, 0x140002000 # AFTER: 140001014: f9403210 ldr x16, [x16, #96] # AFTER: 140001018: d61f0200 br x16 diff --git a/lld/test/COFF/gaps-fill.test b/lld/test/COFF/gaps-fill.test new file mode 100644 index 0000000000000..17cd9cbc86ab3 --- /dev/null +++ b/lld/test/COFF/gaps-fill.test @@ -0,0 +1,78 @@ +# REQUIRES: aarch64 +# RUN: split-file %s %t.dir + +# RUN: llvm-mc -filetype=obj -triple=aarch64-windows %t.dir/arm64-dllmain.s -o %t.dir/arm64-dllmain.obj +# RUN: llvm-mc -filetype=obj -triple=aarch64-windows %t.dir/arm64-p4sym.s -o %t.dir/arm64-p4sym.obj +# RUN: lld-link -dll -machine:arm64 %t.dir/arm64-dllmain.obj %t.dir/arm64-p4sym.obj -out:%t.dll + +# RUN: llvm-objdump -dz %t.dll | FileCheck -check-prefix=CHECK-ARM64 %s +# CHECK-ARM64: 180001000: 52800020 mov w0, #0x1 +# CHECK-ARM64: 180001004: d65f03c0 ret +# CHECK-ARM64: 180001008: 00000000 +# CHECK-ARM64: 18000100c: 00000000 +# CHECK-ARM64: 180001010: 52800040 mov w0, #0x2 +# CHECK-ARM64: 180001014: d65f03c0 ret + +#--- arm64-dllmain.s + .def _DllMainCRTStartup; + .scl 2; + .type 32; + .endef + .globl _DllMainCRTStartup + .p2align 2 +_DllMainCRTStartup: + mov w0, #1 + ret + +#--- arm64-p4sym.s + .def p4sym; + .scl 2; + .type 32; + .endef + .globl p4sym + .p2align 4 +p4sym: + mov w0, #2 + ret + +# RUN: llvm-mc -filetype=obj -triple=x86_64-windows %t.dir/x86_64-dllmain.s -o %t.dir/x86_64-dllmain.obj +# RUN: llvm-mc -filetype=obj -triple=x86_64-windows %t.dir/x86_64-p4sym.s -o %t.dir/x86_64-p4sym.obj +# RUN: lld-link -dll -machine:amd64 %t.dir/x86_64-dllmain.obj %t.dir/x86_64-p4sym.obj -out:%t.dll + +# RUN: llvm-objdump -dz %t.dll | FileCheck -check-prefix=CHECK-X64 %s +# CHECK-X64: 180001000: b8 01 00 00 00 movl $0x1, %eax +# CHECK-X64: 180001005: c3 retq +# CHECK-X64: 180001006: cc int3 +# CHECK-X64: 180001007: cc int3 +# CHECK-X64: 180001008: cc int3 +# CHECK-X64: 180001009: cc int3 +# CHECK-X64: 18000100a: cc int3 +# CHECK-X64: 18000100b: cc int3 +# CHECK-X64: 18000100c: cc int3 +# CHECK-X64: 18000100d: cc int3 +# CHECK-X64: 18000100e: cc int3 +# CHECK-X64: 18000100f: cc int3 +# CHECK-X64: 180001010: b8 02 00 00 00 movl $0x2, %eax +# CHECK-X64: 180001015: c3 retq + +#--- x86_64-dllmain.s + .def _DllMainCRTStartup; + .scl 2; + .type 32; + .endef + .globl _DllMainCRTStartup + .p2align 4, 0x90 +_DllMainCRTStartup: + movl $1, %eax + retq + +#--- x86_64-p4sym.s + .def p4sym; + .scl 2; + .type 32; + .endef + .globl p4sym + .p2align 4, 0x90 +p4sym: + movl $2, %eax + retq diff --git a/lld/test/wasm/Inputs/libstub-missing-dep.so b/lld/test/wasm/Inputs/libstub-missing-dep.so new file mode 100644 index 0000000000000..f2345b766f099 --- /dev/null +++ b/lld/test/wasm/Inputs/libstub-missing-dep.so @@ -0,0 +1,2 @@ +#STUB +foo: missing_dep,missing_dep2 diff --git a/lld/test/wasm/Inputs/libstub-missing-sym.so b/lld/test/wasm/Inputs/libstub-missing-sym.so new file mode 100644 index 0000000000000..2120b948511e9 --- /dev/null +++ b/lld/test/wasm/Inputs/libstub-missing-sym.so @@ -0,0 +1,3 @@ +#STUB +# Symbol `foo` is missing from this file which causes stub_object.s to fail +bar diff --git a/lld/test/wasm/Inputs/libstub.so b/lld/test/wasm/Inputs/libstub.so new file mode 100644 index 0000000000000..57e61f632b101 --- /dev/null +++ b/lld/test/wasm/Inputs/libstub.so @@ -0,0 +1,5 @@ +#STUB +# This is a comment +foo: foodep1,foodep2 +# This symbols as no dependencies +bar diff --git a/lld/test/wasm/stub_library.s b/lld/test/wasm/stub_library.s new file mode 100644 index 0000000000000..9cbf2505ea9e7 --- /dev/null +++ b/lld/test/wasm/stub_library.s @@ -0,0 +1,48 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s +# RUN: wasm-ld %t.o %p/Inputs/libstub.so -o %t.wasm +# RUN: obj2yaml %t.wasm | FileCheck %s + +# When the dependencies are missing the link fails +# RUN: not wasm-ld %t.o %p/Inputs/libstub-missing-dep.so -o %t.wasm 2>&1 | FileCheck --check-prefix=MISSING-DEP %s + +# When the dependencies are missing the link fails +# RUN: not wasm-ld %t.o %p/Inputs/libstub-missing-sym.so -o %t.wasm 2>&1 | FileCheck --check-prefix=MISSING-SYM %s + +# MISSING-DEP: libstub-missing-dep.so: undefined symbol: missing_dep. Required by foo +# MISSING-DEP: libstub-missing-dep.so: undefined symbol: missing_dep2. Required by foo + +# MISSING-SYM: undefined symbol: foo + +# The function foo is defined in libstub.so but depend on foodep1 and foodep2 +.functype foo () -> () + +.globl foodep1 +foodep1: + .functype foodep1 () -> () + end_function + +.globl foodep2 +foodep2: + .functype foodep2 () -> () + end_function + +.globl _start +_start: + .functype _start () -> () + call foo + end_function + +# CHECK: - Type: EXPORT +# CHECK-NEXT: Exports: +# CHECK-NEXT: - Name: memory +# CHECK-NEXT: Kind: MEMORY +# CHECK-NEXT: Index: 0 +# CHECK-NEXT: - Name: foodep1 +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Index: 1 +# CHECK-NEXT: - Name: foodep2 +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Index: 2 +# CHECK-NEXT: - Name: _start +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Index: 3 diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index e697f4b55ae6f..68cd8cabbd7f2 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -279,6 +279,12 @@ void LinkerDriver::addFile(StringRef path) { case file_magic::wasm_object: files.push_back(createObjectFile(mbref)); break; + case file_magic::unknown: + if (mbref.getBuffer().starts_with("#STUB\n")) { + files.push_back(make(mbref)); + break; + } + [[fallthrough]]; default: error("unknown file type: " + mbref.getBufferIdentifier()); } @@ -868,6 +874,53 @@ static void createOptionalSymbols() { WasmSym::tlsBase = createOptionalGlobal("__tls_base", false); } +static void processStubLibraries() { + log("-- processStubLibraries"); + for (auto &stub_file : symtab->stubFiles) { + LLVM_DEBUG(llvm::dbgs() + << "processing stub file: " << stub_file->getName() << "\n"); + for (auto [name, deps]: stub_file->symbolDependencies) { + auto* sym = symtab->find(name); + if (!sym || !sym->isUndefined() || !sym->isUsedInRegularObj || + sym->forceImport) { + LLVM_DEBUG(llvm::dbgs() << "stub not in needed: " << name << "\n"); + continue; + } + // The first stub library to define a given symbol sets this and + // definitions in later stub libraries are ignored. + sym->forceImport = true; + if (sym->traced) + message(toString(stub_file) + ": importing " + name); + else + LLVM_DEBUG(llvm::dbgs() + << toString(stub_file) << ": importing " << name << "\n"); + for (const auto dep : deps) { + auto* needed = symtab->find(dep); + if (!needed) { + error(toString(stub_file) + ": undefined symbol: " + dep + + ". Required by " + toString(*sym)); + } else if (needed->isUndefined()) { + error(toString(stub_file) + + ": undefined symbol: " + toString(*needed) + + ". Required by " + toString(*sym)); + } else { + LLVM_DEBUG(llvm::dbgs() + << "force export: " << toString(*needed) << "\n"); + needed->forceExport = true; + needed->isUsedInRegularObj = true; + if (auto *lazy = dyn_cast(needed)) { + lazy->fetch(); + if (!config->whyExtract.empty()) + config->whyExtractRecords.emplace_back(stub_file->getName(), + sym->getFile(), *sym); + } + } + } + } + } + log("-- done processStubLibraries"); +} + // Reconstructs command line arguments so that so that you can re-run // the same command with the same inputs. This is for --reproduce. static std::string createResponseFile(const opt::InputArgList &args) { @@ -1166,6 +1219,8 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { if (errorCount()) return; + processStubLibraries(); + createOptionalSymbols(); // Resolve any variant symbols that were created due to signature @@ -1217,4 +1272,4 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { writeResult(); } -} // namespace wasm::lld +} // namespace lld::wasm diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp index 75760293bbaae..2d9768c768f29 100644 --- a/lld/wasm/InputFiles.cpp +++ b/lld/wasm/InputFiles.cpp @@ -12,6 +12,7 @@ #include "InputElement.h" #include "OutputSegment.h" #include "SymbolTable.h" +#include "lld/Common/Args.h" #include "lld/Common/CommonLinkerContext.h" #include "lld/Common/Reproduce.h" #include "llvm/Object/Binary.h" @@ -678,6 +679,48 @@ Symbol *ObjFile::createUndefined(const WasmSymbol &sym, bool isCalledDirectly) { llvm_unreachable("unknown symbol kind"); } + +StringRef strip(StringRef s) { + while (s.starts_with(" ")) { + s = s.drop_front(); + } + while (s.ends_with(" ")) { + s = s.drop_back(); + } + return s; +} + +void StubFile::parse() { + bool first = false; + + for (StringRef line : args::getLines(mb)) { + // File must begin with #STUB + if (first) { + assert(line == "#STUB\n"); + first = false; + } + + // Lines starting with # are considered comments + if (line.startswith("#")) + continue; + + StringRef sym; + StringRef rest; + std::tie(sym, rest) = line.split(':'); + sym = strip(sym); + rest = strip(rest); + + symbolDependencies[sym] = {}; + + while (rest.size()) { + StringRef first; + std::tie(first, rest) = rest.split(','); + first = strip(first); + symbolDependencies[sym].push_back(first); + } + } +} + void ArchiveFile::parse() { // Parse a MemoryBufferRef as an archive file. LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n"); diff --git a/lld/wasm/InputFiles.h b/lld/wasm/InputFiles.h index c72f64cb2bd04..11cee5405b657 100644 --- a/lld/wasm/InputFiles.h +++ b/lld/wasm/InputFiles.h @@ -47,6 +47,7 @@ class InputFile { SharedKind, ArchiveKind, BitcodeKind, + StubKind, }; virtual ~InputFile() {} @@ -183,6 +184,18 @@ class BitcodeFile : public InputFile { static bool doneLTO; }; +// Stub libray (See docs/WebAssembly.rst) +class StubFile : public InputFile { +public: + explicit StubFile(MemoryBufferRef m) : InputFile(StubKind, m) {} + + static bool classof(const InputFile *f) { return f->kind() == StubKind; } + + void parse(); + + llvm::DenseMap> symbolDependencies; +}; + inline bool isBitcode(MemoryBufferRef mb) { return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode; } diff --git a/lld/wasm/Relocations.cpp b/lld/wasm/Relocations.cpp index 2f6dd6af2d030..ce41cdcb3e07f 100644 --- a/lld/wasm/Relocations.cpp +++ b/lld/wasm/Relocations.cpp @@ -32,9 +32,9 @@ static bool requiresGOTAccess(const Symbol *sym) { } static bool allowUndefined(const Symbol* sym) { - // Symbols with explicit import names are always allowed to be undefined at + // Symbols that are explicitly imported are always allowed to be undefined at // link time. - if (sym->importName) + if (sym->isImported()) return true; if (isa(sym) && config->importUndefined) return true; diff --git a/lld/wasm/SymbolTable.cpp b/lld/wasm/SymbolTable.cpp index 881b1231ffdf9..d33176a0fa54a 100644 --- a/lld/wasm/SymbolTable.cpp +++ b/lld/wasm/SymbolTable.cpp @@ -38,6 +38,13 @@ void SymbolTable::addFile(InputFile *file) { return; } + // stub file + if (auto *f = dyn_cast(file)) { + f->parse(); + stubFiles.push_back(f); + return; + } + if (config->trace) message(toString(file)); diff --git a/lld/wasm/SymbolTable.h b/lld/wasm/SymbolTable.h index 5009e6039602b..ef2a023b68c44 100644 --- a/lld/wasm/SymbolTable.h +++ b/lld/wasm/SymbolTable.h @@ -102,6 +102,7 @@ class SymbolTable { DefinedFunction *createUndefinedStub(const WasmSignature &sig); std::vector objectFiles; + std::vector stubFiles; std::vector sharedFiles; std::vector bitcodeFiles; std::vector syntheticFunctions; diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp index 8864e840dd585..567ff49dfa444 100644 --- a/lld/wasm/Symbols.cpp +++ b/lld/wasm/Symbols.cpp @@ -221,6 +221,10 @@ void Symbol::setHidden(bool isHidden) { flags |= WASM_SYMBOL_VISIBILITY_DEFAULT; } +bool Symbol::isImported() const { + return isUndefined() && (importName.has_value() || forceImport); +} + bool Symbol::isExported() const { if (!isDefined() || isLocal()) return false; diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h index 16f1b535876e0..34fff4b962bdc 100644 --- a/lld/wasm/Symbols.h +++ b/lld/wasm/Symbols.h @@ -114,6 +114,7 @@ class Symbol { void setOutputSymbolIndex(uint32_t index); WasmSymbolType getWasmType() const; + bool isImported() const; bool isExported() const; bool isExportedExplicit() const; @@ -135,7 +136,8 @@ class Symbol { Symbol(StringRef name, Kind k, uint32_t flags, InputFile *f) : name(name), file(f), symbolKind(k), referenced(!config->gcSections), requiresGOT(false), isUsedInRegularObj(false), forceExport(false), - canInline(false), traced(false), isStub(false), flags(flags) {} + forceImport(false), canInline(false), traced(false), isStub(false), + flags(flags) {} StringRef name; InputFile *file; @@ -160,6 +162,8 @@ class Symbol { // -e/--export command line flag) bool forceExport : 1; + bool forceImport : 1; + // False if LTO shouldn't inline whatever this symbol points to. If a symbol // is overwritten after LTO, LTO shouldn't inline the symbol because it // doesn't know the final contents of the symbol. @@ -661,6 +665,7 @@ T *replaceSymbol(Symbol *s, ArgT &&... arg) { T *s2 = new (s) T(std::forward(arg)...); s2->isUsedInRegularObj = symCopy.isUsedInRegularObj; s2->forceExport = symCopy.forceExport; + s2->forceImport = symCopy.forceImport; s2->canInline = symCopy.canInline; s2->traced = symCopy.traced; s2->referenced = symCopy.referenced; diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index 030ef7468791a..d9e87276b31b0 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -744,7 +744,7 @@ static bool shouldImport(Symbol *sym) { if (config->allowUndefinedSymbols.count(sym->getName()) != 0) return true; - return sym->importName.has_value(); + return sym->isImported(); } void Writer::calculateImports() { @@ -1709,7 +1709,7 @@ void Writer::run() { sym->forceExport = true; } - // Delay reporting error about explicit exports until after + // Delay reporting errors about explicit exports until after // addStartStopSymbols which can create optional symbols. for (auto &name : config->requiredExports) { Symbol *sym = symtab->find(name); diff --git a/lldb/bindings/python/python-typemaps.swig b/lldb/bindings/python/python-typemaps.swig index b3e7dd4ee265a..3e9675c8c00f1 100644 --- a/lldb/bindings/python/python-typemaps.swig +++ b/lldb/bindings/python/python-typemaps.swig @@ -103,11 +103,11 @@ // typemap for a char buffer %typemap(in) (char *dst, size_t dst_len) { - if (!PyInt_Check($input)) { + if (!PyLong_Check($input)) { PyErr_SetString(PyExc_ValueError, "Expecting an integer"); SWIG_fail; } - $2 = PyInt_AsLong($input); + $2 = PyLong_AsLong($input); if ($2 <= 0) { PyErr_SetString(PyExc_ValueError, "Positive integer expected"); SWIG_fail; @@ -139,11 +139,11 @@ // typemap for handling an snprintf-like API like SBThread::GetStopDescription. %typemap(in) (char *dst_or_null, size_t dst_len) { - if (!PyInt_Check($input)) { + if (!PyLong_Check($input)) { PyErr_SetString(PyExc_ValueError, "Expecting an integer"); SWIG_fail; } - $2 = PyInt_AsLong($input); + $2 = PyLong_AsLong($input); if ($2 <= 0) { PyErr_SetString(PyExc_ValueError, "Positive integer expected"); SWIG_fail; @@ -205,9 +205,7 @@ // typemap for an incoming buffer // See also SBProcess::ReadMemory. %typemap(in) (void *buf, size_t size) { - if (PyInt_Check($input)) { - $2 = PyInt_AsLong($input); - } else if (PyLong_Check($input)) { + if (PyLong_Check($input)) { $2 = PyLong_AsLong($input); } else { PyErr_SetString(PyExc_ValueError, "Expecting an integer or long object"); @@ -258,9 +256,7 @@ template <> int32_t PyLongAsT(PyObject *obj) { } template bool SetNumberFromPyObject(T &number, PyObject *obj) { - if (PyInt_Check(obj)) - number = static_cast(PyInt_AsLong(obj)); - else if (PyLong_Check(obj)) + if (PyLong_Check(obj)) number = PyLongAsT(obj); else return false; @@ -345,7 +341,7 @@ template <> bool SetNumberFromPyObject(double &number, PyObject *obj) { count = $2; PyObject *list = PyList_New(count); for (uint32_t j = 0; j < count; j++) { - PyObject *item = PyInt_FromLong($1[j]); + PyObject *item = PyLong_FromLong($1[j]); int ok = PyList_SetItem(list, j, item); if (ok != 0) { $result = Py_None; diff --git a/lldb/packages/Python/lldbsuite/test/make/Android.rules b/lldb/packages/Python/lldbsuite/test/make/Android.rules index 32f786aa34756..cd7d8ae74d6bf 100644 --- a/lldb/packages/Python/lldbsuite/test/make/Android.rules +++ b/lldb/packages/Python/lldbsuite/test/make/Android.rules @@ -24,14 +24,6 @@ else ifeq "$(ARCH)" "i386" SYSROOT_ARCH := x86 STL_ARCH := x86 TRIPLE := i686-none-linux-android -else ifeq "$(ARCH)" "mips64r6" - SYSROOT_ARCH := mips64 - STL_ARCH := mips64 - TRIPLE := mips64el-none-linux-android -else ifeq "$(ARCH)" "mips32" - SYSROOT_ARCH := mips - STL_ARCH := mips - TRIPLE := mipsel-none-linux-android else SYSROOT_ARCH := $(ARCH) STL_ARCH := $(ARCH) diff --git a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules index 25c4d88763326..4c225ed360be5 100644 --- a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules +++ b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules @@ -428,6 +428,16 @@ ifeq (1,$(USE_LIBCPP)) endif endif +ifeq (1, $(USE_SYSTEM_STDLIB)) + ifeq "$(OS)" "Darwin" + ifeq "$(SDKROOT)" "" + $(error "SDKROOT must be set on Darwin to use the system libcxx") + endif + CXXFLAGS += -nostdlib++ -nostdinc++ -cxx-isystem $(SDKROOT)/usr/include/c++/v1 + LDFLAGS += -L$(SDKROOT)/usr/lib -Wl,-rpath,$(SDKROOT)/usr/lib -lc++ + endif +endif + # If no explicit request was made, but we have paths to a custom libcxx, use # them. ifeq ($(or $(USE_LIBSTDCPP), $(USE_LIBCPP), $(USE_SYSTEM_STDLIB)),) diff --git a/lldb/scripts/lldb-test-qemu/run-qemu.sh b/lldb/scripts/lldb-test-qemu/run-qemu.sh old mode 100644 new mode 100755 index 339b8d955e613..d11711c10e772 --- a/lldb/scripts/lldb-test-qemu/run-qemu.sh +++ b/lldb/scripts/lldb-test-qemu/run-qemu.sh @@ -109,8 +109,12 @@ elif [[ "$ARCH" == "arm64" ]]; then QEMU_SVE_MAX_VQ=4 QEMU_CPU="cortex-a53" + if [[ $SVE ]] || [[ $MTE ]]; then + QEMU_CPU="max" + fi + if [[ $SVE ]]; then - QEMU_CPU="max,sve-max-vq=$QEMU_SVE_MAX_VQ" + QEMU_CPU="$QEMU_CPU,sve-max-vq=$QEMU_SVE_MAX_VQ" fi if [[ $MTE ]]; then QEMU_MACHINE="$QEMU_MACHINE,mte=on" diff --git a/lldb/source/Commands/CommandObjectDWIMPrint.cpp b/lldb/source/Commands/CommandObjectDWIMPrint.cpp index 419a27acc8181..ed816195350e9 100644 --- a/lldb/source/Commands/CommandObjectDWIMPrint.cpp +++ b/lldb/source/Commands/CommandObjectDWIMPrint.cpp @@ -61,14 +61,16 @@ bool CommandObjectDWIMPrint::DoExecute(StringRef command, OptionsWithRaw args{command}; StringRef expr = args.GetRawPart(); + if (expr.empty()) { + result.AppendErrorWithFormatv("'{0}' takes a variable or expression", + m_cmd_name); + return false; + } + if (args.HasArgs()) { if (!ParseOptionsAndNotify(args.GetArgs(), result, m_option_group, m_exe_ctx)) return false; - } else if (command.empty()) { - result.AppendErrorWithFormatv("'{0}' takes a variable or expression", - m_cmd_name); - return false; } // If the user has not specified, default to disabling persistent results. diff --git a/lldb/source/Commands/CommandOptionsProcessAttach.cpp b/lldb/source/Commands/CommandOptionsProcessAttach.cpp index f9bd92938fa1c..d3d864dfe0255 100644 --- a/lldb/source/Commands/CommandOptionsProcessAttach.cpp +++ b/lldb/source/Commands/CommandOptionsProcessAttach.cpp @@ -72,5 +72,5 @@ Status CommandOptionsProcessAttach::SetOptionValue( } llvm::ArrayRef CommandOptionsProcessAttach::GetDefinitions() { - return llvm::makeArrayRef(g_process_attach_options); + return llvm::ArrayRef(g_process_attach_options); } diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index f11c95e5660e2..ea917f78841bb 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -371,7 +371,7 @@ let Command = "expression" in { Arg<"Language">, Desc<"Specifies the Language to use when parsing the " "expression. If not set the target.language setting is used.">; def expression_options_apply_fixits : Option<"apply-fixits", "X">, - Groups<[1,2]>, Arg<"Language">, Desc<"If true, simple fix-it hints will be " + Groups<[1,2]>, Arg<"Boolean">, Desc<"If true, simple fix-it hints will be " "automatically applied to the expression.">; def expression_options_description_verbosity : Option<"description-verbosity", "v">, Group<1>, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index 99a0152eaf6e6..c6873a5b7a09a 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -1319,7 +1319,7 @@ size_t SymbolFileDWARF::ParseBlocksRecursive( range.GetByteSize())); else { GetObjectFile()->GetModule()->ReportError( - "{0x:+8}: adding range [{1:x16}-{2:x16}) which has a base " + "{0:x8}: adding range [{1:x16}-{2:x16}) which has a base " "that is less than the function's low PC {3:x16}. Please file " "a bug and attach the file at the " "start of this error message", diff --git a/lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp b/lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp index da57338ffb58a..b1a882465c404 100644 --- a/lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp +++ b/lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp @@ -1299,6 +1299,15 @@ void PDBASTParser::AddRecordMembers( // Query the symbol's value as the variable initializer if valid. if (member_comp_type.IsConst()) { auto value = member->getValue(); + if (value.Type == llvm::pdb::Empty) { + LLDB_LOG(GetLog(LLDBLog::AST), + "Class '{0}' has member '{1}' of type '{2}' with an unknown " + "constant size.", + record_type.GetTypeName(), member_name, + member_comp_type.GetTypeName()); + continue; + } + clang::QualType qual_type = decl->getType(); unsigned type_width = m_ast.getASTContext().getIntWidth(qual_type); unsigned constant_width = value.getBitWidth(); diff --git a/lldb/test/API/commands/dwim-print/TestDWIMPrint.py b/lldb/test/API/commands/dwim-print/TestDWIMPrint.py index 22d18f91d0a59..9f69895f43692 100644 --- a/lldb/test/API/commands/dwim-print/TestDWIMPrint.py +++ b/lldb/test/API/commands/dwim-print/TestDWIMPrint.py @@ -107,3 +107,10 @@ def test_expression_language(self): lldbutil.run_to_name_breakpoint(self, "main") self._expect_cmd(f"dwim-print -l c++ -- argc", "frame variable") self._expect_cmd(f"dwim-print -l c++ -- argc + 1", "expression") + + def test_empty_expression(self): + self.build() + lldbutil.run_to_name_breakpoint(self, "main") + error_msg = "error: 'dwim-print' takes a variable or expression" + self.expect(f"dwim-print", error=True, startstr=error_msg) + self.expect(f"dwim-print -- ", error=True, startstr=error_msg) diff --git a/lldb/test/API/functionalities/inferior-crashing/TestInferiorCrashing.py b/lldb/test/API/functionalities/inferior-crashing/TestInferiorCrashing.py index b63a09d047024..172c00eb59dc2 100644 --- a/lldb/test/API/functionalities/inferior-crashing/TestInferiorCrashing.py +++ b/lldb/test/API/functionalities/inferior-crashing/TestInferiorCrashing.py @@ -63,7 +63,9 @@ def inferior_crashing(self): # The exact stop reason depends on the platform if self.platformIsDarwin(): stop_reason = 'stop reason = EXC_BAD_ACCESS' - elif self.getPlatform() == "linux" or self.getPlatform() == "freebsd": + elif self.getPlatform() == "linux": + stop_reason = 'stop reason = signal SIGSEGV: address not mapped to object' + elif self.getPlatform() == "freebsd": stop_reason = 'stop reason = signal SIGSEGV' else: stop_reason = 'stop reason = invalid address' diff --git a/lldb/test/API/python_api/sbdata/TestSBData.py b/lldb/test/API/python_api/sbdata/TestSBData.py index 932781b9b1b0f..ba839590c1a36 100644 --- a/lldb/test/API/python_api/sbdata/TestSBData.py +++ b/lldb/test/API/python_api/sbdata/TestSBData.py @@ -387,12 +387,13 @@ def test_with_run_command(self): self.assert_data(data2.GetUnsignedInt8, 4, 111) self.assert_data(data2.GetUnsignedInt8, 5, 33) - data2.SetDataFromUInt64Array([1, 2, 3, 4, 5]) + data2.SetDataFromUInt64Array([1, 2, 3, 4, 5, 2**63]) self.assert_data(data2.GetUnsignedInt64, 0, 1) self.assert_data(data2.GetUnsignedInt64, 8, 2) self.assert_data(data2.GetUnsignedInt64, 16, 3) self.assert_data(data2.GetUnsignedInt64, 24, 4) self.assert_data(data2.GetUnsignedInt64, 32, 5) + self.assert_data(data2.GetUnsignedInt64, 40, 2**63) self.assertEqual( data2.uint64[0], 1, diff --git a/lldb/test/Shell/SymbolFile/DWARF/range-lower-then-low-pc.s b/lldb/test/Shell/SymbolFile/DWARF/range-lower-then-low-pc.s new file mode 100644 index 0000000000000..e3cc84db12652 --- /dev/null +++ b/lldb/test/Shell/SymbolFile/DWARF/range-lower-then-low-pc.s @@ -0,0 +1,317 @@ +# REQUIRES: x86 + +# RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj %s > %t +# RUN: lldb-test symbols %t &> %t.txt +# RUN: cat %t.txt | FileCheck %s + +# Tests that error is printed correctly when DW_AT_low_pc value is +# greater then a range entry. + +# CHECK: 0x0000006e: adding range [0x0000000000000000-0x000000000000001f) +# CHECK-SAME: which has a base that is less than the function's low PC 0x0000000000000021. +# CHECK-SAME: Please file a bug and attach the file at the start of this error message + + + +# Test was manually modified to change DW_TAG_lexical_block +# to use DW_AT_ranges, and value lower then DW_AT_low_pc value +# in DW_TAG_subprogram +# static int foo(bool b) { +# if (b) { +# int food = 1; +# return food; +# } +# return 0; +# } +# int main() { +# return foo(true); +# } + .text + .file "main.cpp" + .section .text.main,"ax",@progbits + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin0: + .file 1 "base-lower-then-range-entry" "main.cpp" + .loc 1 8 0 # main.cpp:8:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + subq $16, %rsp + movl $0, -4(%rbp) +.Ltmp0: + .loc 1 9 10 prologue_end # main.cpp:9:10 + movl $1, %edi + callq _ZL3foob + .loc 1 9 3 epilogue_begin is_stmt 0 # main.cpp:9:3 + addq $16, %rsp + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp1: +.Lfunc_end0: + .size main, .Lfunc_end0-main + .cfi_endproc + # -- End function + .section .text._ZL3foob,"ax",@progbits + .p2align 4, 0x90 # -- Begin function _ZL3foob + .type _ZL3foob,@function +_ZL3foob: # @_ZL3foob +.Lfunc_begin1: + .loc 1 1 0 is_stmt 1 # main.cpp:1:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movb %dil, %al + andb $1, %al + movb %al, -5(%rbp) +.Ltmp2: + .loc 1 2 7 prologue_end # main.cpp:2:7 + testb $1, -5(%rbp) + je .LBB1_2 +# %bb.1: # %if.then +.Ltmp3: + .loc 1 3 8 # main.cpp:3:8 + movl $1, -12(%rbp) + .loc 1 4 12 # main.cpp:4:12 + movl -12(%rbp), %eax + .loc 1 4 5 is_stmt 0 # main.cpp:4:5 + movl %eax, -4(%rbp) + jmp .LBB1_3 +.Ltmp4: +.LBB1_2: # %if.end + .loc 1 6 3 is_stmt 1 # main.cpp:6:3 + movl $0, -4(%rbp) +.LBB1_3: # %return + .loc 1 7 1 # main.cpp:7:1 + movl -4(%rbp), %eax + .loc 1 7 1 epilogue_begin is_stmt 0 # main.cpp:7:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp5: +.Lfunc_end1: + .size _ZL3foob, .Lfunc_end1-_ZL3foob + .cfi_endproc + # -- End function + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 14 # DW_FORM_strp + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 14 # DW_FORM_strp + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 85 # DW_AT_ranges + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 110 # DW_AT_linkage_name + .byte 14 # DW_FORM_strp + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 11 # DW_TAG_lexical_block + .byte 1 # DW_CHILDREN_yes + .byte 85 # DW_AT_ranges <------ Manually modified. Replaced low_pc/high)_pc with rangres. + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x8f DW_TAG_compile_unit + .long .Linfo_string0 # DW_AT_producer + .short 33 # DW_AT_language + .long .Linfo_string1 # DW_AT_name + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Linfo_string2 # DW_AT_comp_dir + .quad 0 # DW_AT_low_pc + .long .Ldebug_ranges0 # DW_AT_ranges + .byte 2 # Abbrev [2] 0x2a:0x19 DW_TAG_subprogram + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string3 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 8 # DW_AT_decl_line + .long 138 # DW_AT_type + # DW_AT_external + .byte 3 # Abbrev [3] 0x43:0x48 DW_TAG_subprogram + .quad .Lfunc_begin1 + 1 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string5 # DW_AT_linkage_name + .long .Linfo_string6 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 138 # DW_AT_type + .byte 4 # Abbrev [4] 0x60:0xe DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 123 + .long .Linfo_string7 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 138 # DW_AT_type + .byte 5 # Abbrev [5] 0x6e:0x1c DW_TAG_lexical_block + .long .Ldebug_ranges0 # DW_AT_ranges <-- Manually modified replaced low_pc/high_pc to rangres. + .byte 6 # Abbrev [6] 0x7b:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 116 + .long .Linfo_string9 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 3 # DW_AT_decl_line + .long 138 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 7 # Abbrev [7] 0x8b:0x7 DW_TAG_base_type + .long .Linfo_string4 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 7 # Abbrev [7] 0x92:0x7 DW_TAG_base_type + .long .Linfo_string8 # DW_AT_name + .byte 2 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_ranges,"",@progbits +.Ldebug_ranges0: + .quad .Lfunc_begin0 + .quad .Lfunc_end0 + .quad .Lfunc_begin1 + .quad .Lfunc_end1 + .quad 0 + .quad 0 + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 73027ae39b1492e5b6033358a13b86d7d1e781ae)" # string offset=0 +.Linfo_string1: + .asciz "main.cpp" # string offset=105 +.Linfo_string2: + .asciz "base-lower-then-range-entry" # string offset=114 +.Linfo_string3: + .asciz "main" # string offset=179 +.Linfo_string4: + .asciz "int" # string offset=184 +.Linfo_string5: + .asciz "_ZL3foob" # string offset=188 +.Linfo_string6: + .asciz "foo" # string offset=197 +.Linfo_string7: + .asciz "b" # string offset=201 +.Linfo_string8: + .asciz "bool" # string offset=203 +.Linfo_string9: + .asciz "food" # string offset=208 + .ident "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 73027ae39b1492e5b6033358a13b86d7d1e781ae)" + .section ".note.GNU-stack","",@progbits + .addrsig + .addrsig_sym _ZL3foob + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/lldb/tools/lldb-instr/CMakeLists.txt b/lldb/tools/lldb-instr/CMakeLists.txt index a1bbd7e2b7c93..8da453b2894fd 100644 --- a/lldb/tools/lldb-instr/CMakeLists.txt +++ b/lldb/tools/lldb-instr/CMakeLists.txt @@ -11,4 +11,6 @@ add_lldb_tool(lldb-instr clangSerialization clangTooling + LINK_COMPONENTS + Support ) diff --git a/lldb/tools/lldb-server/CMakeLists.txt b/lldb/tools/lldb-server/CMakeLists.txt index 67103e87a1d4a..56da4c8b56807 100644 --- a/lldb/tools/lldb-server/CMakeLists.txt +++ b/lldb/tools/lldb-server/CMakeLists.txt @@ -7,20 +7,29 @@ set(LLDB_PLUGINS) if(CMAKE_SYSTEM_NAME MATCHES "Linux|Android") list(APPEND LLDB_PLUGINS lldbPluginProcessLinux) + if (CMAKE_SYSTEM_NAME MATCHES "Linux") + list(APPEND LLDB_PLUGINS lldbPluginPlatformLinux) + else() + list(APPEND LLDB_PLUGINS lldbPluginPlatformAndroid) + endif() endif() if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") list(APPEND LLDB_PLUGINS lldbPluginProcessFreeBSD) + list(APPEND LLDB_PLUGINS lldbPluginPlatformFreeBSD) endif() if(CMAKE_SYSTEM_NAME MATCHES "NetBSD") list(APPEND LLDB_PLUGINS lldbPluginProcessNetBSD) + list(APPEND LLDB_PLUGINS lldbPluginPlatformNetBSD) endif() if(CMAKE_SYSTEM_NAME MATCHES "Darwin") list(APPEND LLDB_PLUGINS lldbPluginObjectFileMachO) + list(APPEND LLDB_PLUGINS lldbPluginPlatformMacOSX) elseif(CMAKE_SYSTEM_NAME MATCHES "Windows") list(APPEND LLDB_PLUGINS lldbPluginObjectFilePECOFF) + list(APPEND LLDB_PLUGINS lldbPluginPlatformWindows) else() list(APPEND LLDB_PLUGINS lldbPluginObjectFileELF) endif() diff --git a/lldb/tools/lldb-server/SystemInitializerLLGS.cpp b/lldb/tools/lldb-server/SystemInitializerLLGS.cpp index 4233252a84dfc..1909ea4dc7984 100644 --- a/lldb/tools/lldb-server/SystemInitializerLLGS.cpp +++ b/lldb/tools/lldb-server/SystemInitializerLLGS.cpp @@ -11,12 +11,29 @@ #if defined(__APPLE__) #include "Plugins/ObjectFile/Mach-O/ObjectFileMachO.h" using HostObjectFile = ObjectFileMachO; +#include "Plugins/Platform/MacOSX/PlatformMacOSX.h" +using HostPlatform = lldb_private::PlatformMacOSX; #elif defined(_WIN32) #include "Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.h" using HostObjectFile = ObjectFilePECOFF; +#include "Plugins/Platform/Windows/PlatformWindows.h" +using HostPlatform = lldb_private::PlatformWindows; #else #include "Plugins/ObjectFile/ELF/ObjectFileELF.h" using HostObjectFile = ObjectFileELF; +#if defined(__ANDROID__) +#include "Plugins/Platform/Android/PlatformAndroid.h" +using HostPlatform = lldb_private::platform_android::PlatformAndroid; +#elif defined(__FreeBSD__) +#include "Plugins/Platform/FreeBSD/PlatformFreeBSD.h" +using HostPlatform = lldb_private::platform_freebsd::PlatformFreeBSD; +#elif defined(__linux__) +#include "Plugins/Platform/Linux/PlatformLinux.h" +using HostPlatform = lldb_private::platform_linux::PlatformLinux; +#elif defined(__NetBSD__) +#include "Plugins/Platform/NetBSD/PlatformNetBSD.h" +using HostPlatform = lldb_private::platform_netbsd::PlatformNetBSD; +#endif #endif #if defined(__arm64__) || defined(__aarch64__) || defined(_M_ARM64) @@ -58,6 +75,7 @@ llvm::Error SystemInitializerLLGS::Initialize() { return e; HostObjectFile::Initialize(); + HostPlatform::Initialize(); #if defined(LLDB_TARGET_ARM) || defined(LLDB_TARGET_ARM64) EmulateInstructionARM::Initialize(); @@ -80,6 +98,7 @@ llvm::Error SystemInitializerLLGS::Initialize() { void SystemInitializerLLGS::Terminate() { HostObjectFile::Terminate(); + HostPlatform::Terminate(); #if defined(LLDB_TARGET_ARM) || defined(LLDB_TARGET_ARM64) EmulateInstructionARM::Terminate(); diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst index 429b59a8d9404..ffd1028cedd80 100644 --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -15,9 +15,9 @@ supported variations of the RISC-V specification. It lives in the Base ISAs ========= -The specification defines four base instruction sets: RV32I, RV32E, RV64I, -and RV128I. Currently, LLVM fully supports RV32I, and RV64I. RV32E is -supported by the assembly-based tools only. RV128I is not supported. +The specification defines five base instruction sets: RV32I, RV32E, RV64I, +RV64E, and RV128I. Currently, LLVM fully supports RV32I, and RV64I. RV32E and +RV64E are supported by the assembly-based tools only. RV128I is not supported. To specify the target triple: @@ -27,7 +27,7 @@ To specify the target triple: Architecture Description ============ ============================================================== ``riscv32`` RISC-V with XLEN=32 (i.e. RV32I or RV32E) - ``riscv64`` RISC-V with XLEN=64 (i.e. RV64I) + ``riscv64`` RISC-V with XLEN=64 (i.e. RV64I or RV64E) ============ ============================================================== To select an E variant ISA (e.g. RV32E instead of RV32I), use the base diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index d87d20704f166..6f78497644479 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -122,7 +122,11 @@ Changes to the MIPS Backend Changes to the PowerPC Backend ------------------------------ -* ... +* A new option ``-mroptr`` is added to ``clang`` and ``llc``. When this option + is present, constant objects with relocatable address values are put into the + RO data section. This option should be used with the ``-fdata-sections`` + option, and is not supported with ``-fno-data-sections``. The option is + only supported on AIX. Changes to the RISC-V Backend ----------------------------- @@ -144,6 +148,7 @@ Changes to the RISC-V Backend * Adds support for the vendor-defined XTHeadCmo (cache management operations) extension. * Adds support for the vendor-defined XTHeadSync (multi-core synchronization instructions) extension. * Added support for the vendor-defined XTHeadFMemIdx (indexed memory operations for floating point) extension. +* Assembler support for RV64E was added. Changes to the WebAssembly Backend ---------------------------------- diff --git a/llvm/docs/ScudoHardenedAllocator.rst b/llvm/docs/ScudoHardenedAllocator.rst index 9c1cfa6edcd63..875d018c4d9ff 100644 --- a/llvm/docs/ScudoHardenedAllocator.rst +++ b/llvm/docs/ScudoHardenedAllocator.rst @@ -265,7 +265,16 @@ The following "mallopt" options are available (options are defined in | | the interval to the minimum and maximum value as | | | specified at compile time). | +---------------------------+-------------------------------------------------------+ -| M_PURGE | Forces immediate memory reclaiming (value is unused). | +| M_PURGE | Forces immediate memory reclaiming but does not | +| | reclaim everything. For smaller size classes, there | +| | is still some memory that is not reclaimed due to the | +| | extra time it takes and the small amount of memory | +| | that can be reclaimed. | +| | The value is ignored. | ++---------------------------+-------------------------------------------------------+ +| M_PURGE_ALL | Same as M_PURGE but will force release all possible | +| | memory regardless of how long it takes. | +| | The value is ignored. | +---------------------------+-------------------------------------------------------+ | M_MEMTAG_TUNING | Tunes the allocator's choice of memory tags to make | | | it more likely that a certain class of memory errors | diff --git a/llvm/include/llvm/Analysis/InstructionSimplify.h b/llvm/include/llvm/Analysis/InstructionSimplify.h index 861fa3b20a495..826bd45d8057b 100644 --- a/llvm/include/llvm/Analysis/InstructionSimplify.h +++ b/llvm/include/llvm/Analysis/InstructionSimplify.h @@ -302,8 +302,9 @@ Value *simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, Value *simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q); -/// Given a callsite, fold the result or return null. -Value *simplifyCall(CallBase *Call, const SimplifyQuery &Q); +/// Given a callsite, callee, and arguments, fold the result or return null. +Value *simplifyCall(CallBase *Call, Value *Callee, ArrayRef Args, + const SimplifyQuery &Q); /// Given a constrained FP intrinsic call, tries to compute its simplified /// version. Returns a simplified result or null. diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h index 475d87bdd5b13..19b466629dbfc 100644 --- a/llvm/include/llvm/CodeGen/CommandFlags.h +++ b/llvm/include/llvm/CodeGen/CommandFlags.h @@ -143,6 +143,8 @@ unsigned getAlignLoops(); bool getJMCInstrument(); +bool getXCOFFReadOnlyPointers(); + /// Create this object with static storage to register codegen-related command /// line options. struct RegisterCodeGenFlags { diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h index ce447be3af41f..fc4e5ca756248 100644 --- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h @@ -101,8 +101,9 @@ class MachineRegisterInfo { /// first member of the pair being non-zero. If the hinted register is /// virtual, it means the allocator should prefer the physical register /// allocated to it if any. - IndexedMap>, - VirtReg2IndexFunctor> RegAllocHints; + IndexedMap>, + VirtReg2IndexFunctor> + RegAllocHints; /// PhysRegUseDefLists - This is an array of the head of the use/def list for /// physical registers. @@ -818,27 +819,25 @@ class MachineRegisterInfo { /// getRegAllocationHint - Return the register allocation hint for the /// specified virtual register. If there are many hints, this returns the /// one with the greatest weight. - std::pair - getRegAllocationHint(Register VReg) const { + std::pair getRegAllocationHint(Register VReg) const { assert(VReg.isVirtual()); Register BestHint = (RegAllocHints[VReg.id()].second.size() ? RegAllocHints[VReg.id()].second[0] : Register()); - return std::pair(RegAllocHints[VReg.id()].first, - BestHint); + return {RegAllocHints[VReg.id()].first, BestHint}; } /// getSimpleHint - same as getRegAllocationHint except it will only return /// a target independent hint. Register getSimpleHint(Register VReg) const { assert(VReg.isVirtual()); - std::pair Hint = getRegAllocationHint(VReg); + std::pair Hint = getRegAllocationHint(VReg); return Hint.first ? Register() : Hint.second; } /// getRegAllocationHints - Return a reference to the vector of all /// register allocation hints for VReg. - const std::pair> - &getRegAllocationHints(Register VReg) const { + const std::pair> & + getRegAllocationHints(Register VReg) const { assert(VReg.isVirtual()); return RegAllocHints[VReg]; } diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h index de9902ae2ebcb..5c01dad848fd2 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h @@ -355,6 +355,7 @@ class DWARFDebugLine { private: DWARFUnit *prepareToParse(uint64_t Offset); void moveToNextTable(uint64_t OldOffset, const Prologue &P); + bool hasValidVersion(uint64_t Offset); LineToUnitMap LineToUnit; diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h index 2b6696c7fdffd..464b21d536300 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h @@ -367,13 +367,15 @@ inline orc::ExecutorAddr alignToBlock(orc::ExecutorAddr Addr, Block &B) { // must end with a zero, and contain no zeros before the end. bool isCStringBlock(Block &B); -/// Describes symbol linkage. This can be used to make resolve definition -/// clashes. +/// Describes symbol linkage. This can be used to resolve definition clashes. enum class Linkage : uint8_t { Strong, Weak, }; +/// Holds target-specific properties for a symbol. +using TargetFlagsType = uint8_t; + /// For errors and debugging output. const char *getLinkageName(Linkage L); @@ -611,6 +613,17 @@ class Symbol { this->S = static_cast(S); } + /// Check wehther the given target flags are set for this Symbol. + bool hasTargetFlags(TargetFlagsType Flags) const { + return static_cast(TargetFlags) & Flags; + } + + /// Set the target flags for this Symbol. + void setTargetFlags(TargetFlagsType Flags) { + assert(Flags <= 1 && "Add more bits to store more than single flag"); + TargetFlags = Flags; + } + /// Returns true if this is a weakly referenced external symbol. /// This method may only be called on external symbols. bool isWeaklyReferenced() const { @@ -655,12 +668,13 @@ class Symbol { // FIXME: A char* or SymbolStringPtr may pack better. StringRef Name; Addressable *Base = nullptr; - uint64_t Offset : 58; + uint64_t Offset : 57; uint64_t L : 1; uint64_t S : 2; uint64_t IsLive : 1; uint64_t IsCallable : 1; uint64_t WeakRef : 1; + uint64_t TargetFlags : 1; size_t Size = 0; }; @@ -713,6 +727,9 @@ class Section { /// Returns the ordinal for this section. SectionOrdinal getOrdinal() const { return SecOrdinal; } + /// Returns true if this section is empty (contains no blocks or symbols). + bool empty() const { return Blocks.empty(); } + /// Returns an iterator over the blocks defined in this section. iterator_range blocks() { return make_range(Blocks.begin(), Blocks.end()); diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index a13f8528fa6dd..acf91a2d35c41 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -27,6 +27,7 @@ namespace llvm { class CanonicalLoopInfo; struct TargetRegionEntryInfo; class OffloadEntriesInfoManager; +class OpenMPIRBuilder; /// Move the instruction after an InsertPoint to the beginning of another /// BasicBlock. @@ -160,6 +161,251 @@ class OpenMPIRBuilderConfig { void setSeparator(StringRef S) { Separator = S; } }; +/// Data structure to contain the information needed to uniquely identify +/// a target entry. +struct TargetRegionEntryInfo { + std::string ParentName; + unsigned DeviceID; + unsigned FileID; + unsigned Line; + unsigned Count; + + TargetRegionEntryInfo() + : ParentName(""), DeviceID(0), FileID(0), Line(0), Count(0) {} + TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID, + unsigned FileID, unsigned Line, unsigned Count = 0) + : ParentName(ParentName), DeviceID(DeviceID), FileID(FileID), Line(Line), + Count(Count) {} + + static void getTargetRegionEntryFnName(SmallVectorImpl &Name, + StringRef ParentName, + unsigned DeviceID, unsigned FileID, + unsigned Line, unsigned Count); + + bool operator<(const TargetRegionEntryInfo RHS) const { + return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) < + std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line, + RHS.Count); + } +}; + +/// Class that manages information about offload code regions and data +class OffloadEntriesInfoManager { + /// Number of entries registered so far. + OpenMPIRBuilder *OMPBuilder; + unsigned OffloadingEntriesNum = 0; + +public: + /// Base class of the entries info. + class OffloadEntryInfo { + public: + /// Kind of a given entry. + enum OffloadingEntryInfoKinds : unsigned { + /// Entry is a target region. + OffloadingEntryInfoTargetRegion = 0, + /// Entry is a declare target variable. + OffloadingEntryInfoDeviceGlobalVar = 1, + /// Invalid entry info. + OffloadingEntryInfoInvalid = ~0u + }; + + protected: + OffloadEntryInfo() = delete; + explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {} + explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order, + uint32_t Flags) + : Flags(Flags), Order(Order), Kind(Kind) {} + ~OffloadEntryInfo() = default; + + public: + bool isValid() const { return Order != ~0u; } + unsigned getOrder() const { return Order; } + OffloadingEntryInfoKinds getKind() const { return Kind; } + uint32_t getFlags() const { return Flags; } + void setFlags(uint32_t NewFlags) { Flags = NewFlags; } + Constant *getAddress() const { return cast_or_null(Addr); } + void setAddress(Constant *V) { + assert(!Addr.pointsToAliveValue() && "Address has been set before!"); + Addr = V; + } + static bool classof(const OffloadEntryInfo *Info) { return true; } + + private: + /// Address of the entity that has to be mapped for offloading. + WeakTrackingVH Addr; + + /// Flags associated with the device global. + uint32_t Flags = 0u; + + /// Order this entry was emitted. + unsigned Order = ~0u; + + OffloadingEntryInfoKinds Kind = OffloadingEntryInfoInvalid; + }; + + /// Return true if a there are no entries defined. + bool empty() const; + /// Return number of entries defined so far. + unsigned size() const { return OffloadingEntriesNum; } + + OffloadEntriesInfoManager(OpenMPIRBuilder *builder) : OMPBuilder(builder) {} + + // + // Target region entries related. + // + + /// Kind of the target registry entry. + enum OMPTargetRegionEntryKind : uint32_t { + /// Mark the entry as target region. + OMPTargetRegionEntryTargetRegion = 0x0, + /// Mark the entry as a global constructor. + OMPTargetRegionEntryCtor = 0x02, + /// Mark the entry as a global destructor. + OMPTargetRegionEntryDtor = 0x04, + }; + + /// Target region entries info. + class OffloadEntryInfoTargetRegion final : public OffloadEntryInfo { + /// Address that can be used as the ID of the entry. + Constant *ID = nullptr; + + public: + OffloadEntryInfoTargetRegion() + : OffloadEntryInfo(OffloadingEntryInfoTargetRegion) {} + explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr, + Constant *ID, + OMPTargetRegionEntryKind Flags) + : OffloadEntryInfo(OffloadingEntryInfoTargetRegion, Order, Flags), + ID(ID) { + setAddress(Addr); + } + + Constant *getID() const { return ID; } + void setID(Constant *V) { + assert(!ID && "ID has been set before!"); + ID = V; + } + static bool classof(const OffloadEntryInfo *Info) { + return Info->getKind() == OffloadingEntryInfoTargetRegion; + } + }; + + /// Initialize target region entry. + /// This is ONLY needed for DEVICE compilation. + void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, + unsigned Order); + /// Register target region entry. + void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, + Constant *Addr, Constant *ID, + OMPTargetRegionEntryKind Flags); + /// Return true if a target region entry with the provided information + /// exists. + bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, + bool IgnoreAddressId = false) const; + + // Return the Name based on \a EntryInfo using the next available Count. + void getTargetRegionEntryFnName(SmallVectorImpl &Name, + const TargetRegionEntryInfo &EntryInfo); + + /// brief Applies action \a Action on all registered entries. + typedef function_ref + OffloadTargetRegionEntryInfoActTy; + void + actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action); + + // + // Device global variable entries related. + // + + /// Kind of the global variable entry.. + enum OMPTargetGlobalVarEntryKind : uint32_t { + /// Mark the entry as a to declare target. + OMPTargetGlobalVarEntryTo = 0x0, + /// Mark the entry as a to declare target link. + OMPTargetGlobalVarEntryLink = 0x1, + }; + + /// Device global variable entries info. + class OffloadEntryInfoDeviceGlobalVar final : public OffloadEntryInfo { + /// Type of the global variable. + int64_t VarSize; + GlobalValue::LinkageTypes Linkage; + + public: + OffloadEntryInfoDeviceGlobalVar() + : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar) {} + explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, + OMPTargetGlobalVarEntryKind Flags) + : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags) {} + explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr, + int64_t VarSize, + OMPTargetGlobalVarEntryKind Flags, + GlobalValue::LinkageTypes Linkage) + : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags), + VarSize(VarSize), Linkage(Linkage) { + setAddress(Addr); + } + + int64_t getVarSize() const { return VarSize; } + void setVarSize(int64_t Size) { VarSize = Size; } + GlobalValue::LinkageTypes getLinkage() const { return Linkage; } + void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; } + static bool classof(const OffloadEntryInfo *Info) { + return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar; + } + }; + + /// Initialize device global variable entry. + /// This is ONLY used for DEVICE compilation. + void initializeDeviceGlobalVarEntryInfo(StringRef Name, + OMPTargetGlobalVarEntryKind Flags, + unsigned Order); + + /// Register device global variable entry. + void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, + int64_t VarSize, + OMPTargetGlobalVarEntryKind Flags, + GlobalValue::LinkageTypes Linkage); + /// Checks if the variable with the given name has been registered already. + bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const { + return OffloadEntriesDeviceGlobalVar.count(VarName) > 0; + } + /// Applies action \a Action on all registered entries. + typedef function_ref + OffloadDeviceGlobalVarEntryInfoActTy; + void actOnDeviceGlobalVarEntriesInfo( + const OffloadDeviceGlobalVarEntryInfoActTy &Action); + +private: + /// Return the count of entries at a particular source location. + unsigned + getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const; + + /// Update the count of entries at a particular source location. + void + incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo); + + static TargetRegionEntryInfo + getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) { + return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID, + EntryInfo.FileID, EntryInfo.Line, 0); + } + + // Count of entries at a location. + std::map OffloadEntriesTargetRegionCount; + + // Storage for target region entries kind. + typedef std::map + OffloadEntriesTargetRegionTy; + OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion; + /// Storage for device global variable entries kind. The storage is to be + /// indexed by mangled name. + typedef StringMap + OffloadEntriesDeviceGlobalVarTy; + OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar; +}; + /// An interface to create LLVM-IR for OpenMP directives. /// /// Each OpenMP directive has a corresponding public generator method. @@ -167,7 +413,8 @@ class OpenMPIRBuilder { public: /// Create a new OpenMPIRBuilder operating on the given module \p M. This will /// not have an effect on \p M (see initialize) - OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) {} + OpenMPIRBuilder(Module &M) + : M(M), Builder(M.getContext()), OffloadInfoManager(this) {} ~OpenMPIRBuilder(); /// Initialize the internal state, this will put structures types and @@ -1063,6 +1310,9 @@ class OpenMPIRBuilder { /// Map to remember existing ident_t*. DenseMap, Constant *> IdentMap; + /// Info manager to keep track of target regions. + OffloadEntriesInfoManager OffloadInfoManager; + /// Helper that contains information about regions we need to outline /// during finalization. struct OutlineInfo { @@ -1231,7 +1481,6 @@ class OpenMPIRBuilder { // // We only generate metadata for function that contain target regions. void createOffloadEntriesAndInfoMetadata( - OffloadEntriesInfoManager &OffloadEntriesInfoManager, EmitMetadataErrorReportFunctionTy &ErrorReportFunction); public: @@ -1531,8 +1780,7 @@ class OpenMPIRBuilder { /// \param NumThreads Number default threads /// \param OutlinedFunction Pointer to the outlined function /// \param EntryFnIDName Name of the ID o be created - void emitTargetRegionFunction(OffloadEntriesInfoManager &InfoManager, - TargetRegionEntryInfo &EntryInfo, + void emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, int32_t NumTeams, int32_t NumThreads, bool IsOffloadEntry, Function *&OutlinedFn, @@ -1548,8 +1796,7 @@ class OpenMPIRBuilder { /// \param EntryFnIDName Name of the ID o be created /// \param NumTeams Number default teams /// \param NumThreads Number default threads - Constant *registerTargetRegionFunction(OffloadEntriesInfoManager &InfoManager, - TargetRegionEntryInfo &EntryInfo, + Constant *registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName, @@ -1918,10 +2165,7 @@ class OpenMPIRBuilder { /// /// \param M Module to load Metadata info from. Module passed maybe /// loaded from bitcode file, i.e, different from OpenMPIRBuilder::M module. - /// \param OffloadEntriesInfoManager Initialize Offload Entry information. - void - loadOffloadInfoMetadata(Module &M, - OffloadEntriesInfoManager &OffloadEntriesInfoManager); + void loadOffloadInfoMetadata(Module &M); /// Gets (if variable with the given name already exist) or creates /// internal global variable with the specified Name. The created variable has @@ -1933,253 +2177,6 @@ class OpenMPIRBuilder { unsigned AddressSpace = 0); }; -/// Data structure to contain the information needed to uniquely identify -/// a target entry. -struct TargetRegionEntryInfo { - std::string ParentName; - unsigned DeviceID; - unsigned FileID; - unsigned Line; - unsigned Count; - - TargetRegionEntryInfo() - : ParentName(""), DeviceID(0), FileID(0), Line(0), Count(0) {} - TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID, - unsigned FileID, unsigned Line, unsigned Count = 0) - : ParentName(ParentName), DeviceID(DeviceID), FileID(FileID), Line(Line), - Count(Count) {} - - static void getTargetRegionEntryFnName(SmallVectorImpl &Name, - StringRef ParentName, - unsigned DeviceID, unsigned FileID, - unsigned Line, unsigned Count); - - bool operator<(const TargetRegionEntryInfo RHS) const { - return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) < - std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line, - RHS.Count); - } -}; - -/// Class that manages information about offload code regions and data -class OffloadEntriesInfoManager { - /// Number of entries registered so far. - OpenMPIRBuilderConfig Config; - unsigned OffloadingEntriesNum = 0; - -public: - void setConfig(OpenMPIRBuilderConfig C) { Config = C; } - - /// Base class of the entries info. - class OffloadEntryInfo { - public: - /// Kind of a given entry. - enum OffloadingEntryInfoKinds : unsigned { - /// Entry is a target region. - OffloadingEntryInfoTargetRegion = 0, - /// Entry is a declare target variable. - OffloadingEntryInfoDeviceGlobalVar = 1, - /// Invalid entry info. - OffloadingEntryInfoInvalid = ~0u - }; - - protected: - OffloadEntryInfo() = delete; - explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {} - explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order, - uint32_t Flags) - : Flags(Flags), Order(Order), Kind(Kind) {} - ~OffloadEntryInfo() = default; - - public: - bool isValid() const { return Order != ~0u; } - unsigned getOrder() const { return Order; } - OffloadingEntryInfoKinds getKind() const { return Kind; } - uint32_t getFlags() const { return Flags; } - void setFlags(uint32_t NewFlags) { Flags = NewFlags; } - Constant *getAddress() const { return cast_or_null(Addr); } - void setAddress(Constant *V) { - assert(!Addr.pointsToAliveValue() && "Address has been set before!"); - Addr = V; - } - static bool classof(const OffloadEntryInfo *Info) { return true; } - - private: - /// Address of the entity that has to be mapped for offloading. - WeakTrackingVH Addr; - - /// Flags associated with the device global. - uint32_t Flags = 0u; - - /// Order this entry was emitted. - unsigned Order = ~0u; - - OffloadingEntryInfoKinds Kind = OffloadingEntryInfoInvalid; - }; - - /// Return true if a there are no entries defined. - bool empty() const; - /// Return number of entries defined so far. - unsigned size() const { return OffloadingEntriesNum; } - - OffloadEntriesInfoManager() : Config() {} - - // - // Target region entries related. - // - - /// Kind of the target registry entry. - enum OMPTargetRegionEntryKind : uint32_t { - /// Mark the entry as target region. - OMPTargetRegionEntryTargetRegion = 0x0, - /// Mark the entry as a global constructor. - OMPTargetRegionEntryCtor = 0x02, - /// Mark the entry as a global destructor. - OMPTargetRegionEntryDtor = 0x04, - }; - - /// Target region entries info. - class OffloadEntryInfoTargetRegion final : public OffloadEntryInfo { - /// Address that can be used as the ID of the entry. - Constant *ID = nullptr; - - public: - OffloadEntryInfoTargetRegion() - : OffloadEntryInfo(OffloadingEntryInfoTargetRegion) {} - explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr, - Constant *ID, - OMPTargetRegionEntryKind Flags) - : OffloadEntryInfo(OffloadingEntryInfoTargetRegion, Order, Flags), - ID(ID) { - setAddress(Addr); - } - - Constant *getID() const { return ID; } - void setID(Constant *V) { - assert(!ID && "ID has been set before!"); - ID = V; - } - static bool classof(const OffloadEntryInfo *Info) { - return Info->getKind() == OffloadingEntryInfoTargetRegion; - } - }; - - /// Initialize target region entry. - /// This is ONLY needed for DEVICE compilation. - void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, - unsigned Order); - /// Register target region entry. - void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, - Constant *Addr, Constant *ID, - OMPTargetRegionEntryKind Flags); - /// Return true if a target region entry with the provided information - /// exists. - bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, - bool IgnoreAddressId = false) const; - - // Return the Name based on \a EntryInfo using the next available Count. - void getTargetRegionEntryFnName(SmallVectorImpl &Name, - const TargetRegionEntryInfo &EntryInfo); - - /// brief Applies action \a Action on all registered entries. - typedef function_ref - OffloadTargetRegionEntryInfoActTy; - void - actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action); - - // - // Device global variable entries related. - // - - /// Kind of the global variable entry.. - enum OMPTargetGlobalVarEntryKind : uint32_t { - /// Mark the entry as a to declare target. - OMPTargetGlobalVarEntryTo = 0x0, - /// Mark the entry as a to declare target link. - OMPTargetGlobalVarEntryLink = 0x1, - }; - - /// Device global variable entries info. - class OffloadEntryInfoDeviceGlobalVar final : public OffloadEntryInfo { - /// Type of the global variable. - int64_t VarSize; - GlobalValue::LinkageTypes Linkage; - - public: - OffloadEntryInfoDeviceGlobalVar() - : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar) {} - explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, - OMPTargetGlobalVarEntryKind Flags) - : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags) {} - explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr, - int64_t VarSize, - OMPTargetGlobalVarEntryKind Flags, - GlobalValue::LinkageTypes Linkage) - : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags), - VarSize(VarSize), Linkage(Linkage) { - setAddress(Addr); - } - - int64_t getVarSize() const { return VarSize; } - void setVarSize(int64_t Size) { VarSize = Size; } - GlobalValue::LinkageTypes getLinkage() const { return Linkage; } - void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; } - static bool classof(const OffloadEntryInfo *Info) { - return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar; - } - }; - - /// Initialize device global variable entry. - /// This is ONLY used for DEVICE compilation. - void initializeDeviceGlobalVarEntryInfo(StringRef Name, - OMPTargetGlobalVarEntryKind Flags, - unsigned Order); - - /// Register device global variable entry. - void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, - int64_t VarSize, - OMPTargetGlobalVarEntryKind Flags, - GlobalValue::LinkageTypes Linkage); - /// Checks if the variable with the given name has been registered already. - bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const { - return OffloadEntriesDeviceGlobalVar.count(VarName) > 0; - } - /// Applies action \a Action on all registered entries. - typedef function_ref - OffloadDeviceGlobalVarEntryInfoActTy; - void actOnDeviceGlobalVarEntriesInfo( - const OffloadDeviceGlobalVarEntryInfoActTy &Action); - -private: - /// Return the count of entries at a particular source location. - unsigned - getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const; - - /// Update the count of entries at a particular source location. - void - incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo); - - static TargetRegionEntryInfo - getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) { - return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID, - EntryInfo.FileID, EntryInfo.Line, 0); - } - - // Count of entries at a location. - std::map OffloadEntriesTargetRegionCount; - - // Storage for target region entries kind. - typedef std::map - OffloadEntriesTargetRegionTy; - OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion; - /// Storage for device global variable entries kind. The storage is to be - /// indexed by mangled name. - typedef StringMap - OffloadEntriesDeviceGlobalVarTy; - OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar; -}; - /// Class to represented the control flow structure of an OpenMP canonical loop. /// /// The control-flow structure is standardized for easy consumption by diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h index 9cc56ecf8e970..baa4bac8c8e14 100644 --- a/llvm/include/llvm/IR/Constants.h +++ b/llvm/include/llvm/IR/Constants.h @@ -111,8 +111,12 @@ class ConstantInt final : public ConstantData { /// either getSExtValue() or getZExtValue() will yield a correctly sized and /// signed value for the type Ty. /// Get a ConstantInt for a specific signed value. - static ConstantInt *getSigned(IntegerType *Ty, int64_t V); - static Constant *getSigned(Type *Ty, int64_t V); + static ConstantInt *getSigned(IntegerType *Ty, int64_t V) { + return get(Ty, V, true); + } + static Constant *getSigned(Type *Ty, int64_t V) { + return get(Ty, V, true); + } /// Return a ConstantInt with the specified value and an implied Type. The /// type is the integer type that corresponds to the bit width of the value. diff --git a/llvm/include/llvm/IR/LLVMContext.h b/llvm/include/llvm/IR/LLVMContext.h index a8095a5c2fd30..c13a783e86c7a 100644 --- a/llvm/include/llvm/IR/LLVMContext.h +++ b/llvm/include/llvm/IR/LLVMContext.h @@ -68,7 +68,7 @@ class LLVMContext { public: LLVMContextImpl *const pImpl; LLVMContext(); - LLVMContext(LLVMContext &) = delete; + LLVMContext(const LLVMContext &) = delete; LLVMContext &operator=(const LLVMContext &) = delete; ~LLVMContext(); diff --git a/llvm/include/llvm/IR/MDBuilder.h b/llvm/include/llvm/IR/MDBuilder.h index bd542bd0d2b2b..39165453de16b 100644 --- a/llvm/include/llvm/IR/MDBuilder.h +++ b/llvm/include/llvm/IR/MDBuilder.h @@ -78,7 +78,7 @@ class MDBuilder { MDNode *createFunctionSectionPrefix(StringRef Prefix); /// Return metadata containing the pseudo probe descriptor for a function. - MDNode *createPseudoProbeDesc(uint64_t GUID, uint64_t Hash, Function *F); + MDNode *createPseudoProbeDesc(uint64_t GUID, uint64_t Hash, StringRef FName); /// Return metadata containing llvm statistics. MDNode * diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h index 18853102799b4..0c178ccef3bbb 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -988,12 +988,22 @@ class FunctionSummary : public GlobalValueSummary { return {}; } + CallsitesTy &mutableCallsites() { + assert(Callsites); + return *Callsites; + } + ArrayRef allocs() const { if (Allocs) return *Allocs; return {}; } + AllocsTy &mutableAllocs() { + assert(Allocs); + return *Allocs; + } + friend struct GraphTraits; }; diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h index aa39954d62868..f5891b24ae4b4 100644 --- a/llvm/include/llvm/MC/MCStreamer.h +++ b/llvm/include/llvm/MC/MCStreamer.h @@ -645,7 +645,7 @@ class MCStreamer { /// relocation table for one or more symbols. /// /// \param Sym - The symbol on the .ref directive. - virtual void emitXCOFFRefDirective(StringRef Sym); + virtual void emitXCOFFRefDirective(const MCSymbol *Symbol); /// Emit an ELF .size directive. /// diff --git a/llvm/include/llvm/MC/MCXCOFFStreamer.h b/llvm/include/llvm/MC/MCXCOFFStreamer.h index a437faeccbff4..aea2a3265d572 100644 --- a/llvm/include/llvm/MC/MCXCOFFStreamer.h +++ b/llvm/include/llvm/MC/MCXCOFFStreamer.h @@ -31,10 +31,7 @@ class MCXCOFFStreamer : public MCObjectStreamer { void emitXCOFFSymbolLinkageWithVisibility(MCSymbol *Symbol, MCSymbolAttr Linkage, MCSymbolAttr Visibility) override; - void emitXCOFFRefDirective(StringRef Name) override { - report_fatal_error("emitXCOFFRefDirective is not implemented yet on object" - "generation path"); - } + void emitXCOFFRefDirective(const MCSymbol *Symbol) override; void emitXCOFFRenameDirective(const MCSymbol *Name, StringRef Rename) override { report_fatal_error("emitXCOFFRenameDirective is not implemented yet on " diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index 22e811653c6d4..76e4248088afd 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -130,13 +130,12 @@ namespace llvm { HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false), GuaranteedTailCallOpt(false), StackSymbolOrdering(true), EnableFastISel(false), EnableGlobalISel(false), UseInitArray(false), - DisableIntegratedAS(false), - RelaxELFRelocations(true), FunctionSections(false), - DataSections(false), IgnoreXCOFFVisibility(false), - XCOFFTracebackTable(true), UniqueSectionNames(true), - UniqueBasicBlockSectionNames(false), TrapUnreachable(false), - NoTrapAfterNoreturn(false), TLSSize(0), EmulatedTLS(false), - ExplicitEmulatedTLS(false), EnableIPRA(false), + DisableIntegratedAS(false), RelaxELFRelocations(true), + FunctionSections(false), DataSections(false), + IgnoreXCOFFVisibility(false), XCOFFTracebackTable(true), + UniqueSectionNames(true), UniqueBasicBlockSectionNames(false), + TrapUnreachable(false), NoTrapAfterNoreturn(false), TLSSize(0), + EmulatedTLS(false), ExplicitEmulatedTLS(false), EnableIPRA(false), EmitStackSizeSection(false), EnableMachineOutliner(false), EnableMachineFunctionSplitter(false), SupportsDefaultOutlining(false), EmitAddrsig(false), EmitCallSiteInfo(false), @@ -144,7 +143,7 @@ namespace llvm { ValueTrackingVariableLocations(false), ForceDwarfFrameSection(false), XRayOmitFunctionIndex(false), DebugStrictDwarf(false), Hotpatch(false), PPCGenScalarMASSEntries(false), JMCInstrument(false), - EnableCFIFixup(false), MisExpect(false), + EnableCFIFixup(false), MisExpect(false), XCOFFReadOnlyPointers(false), FPDenormalMode(DenormalMode::IEEE, DenormalMode::IEEE) {} /// DisableFramePointerElim - This returns true if frame pointer elimination @@ -360,6 +359,10 @@ namespace llvm { /// By default, it is set to false unsigned MisExpect : 1; + /// When set to true, const objects with relocatable address values are put + /// into the RO data section. + unsigned XCOFFReadOnlyPointers : 1; + /// Name of the stack usage file (i.e., .su file) if user passes /// -fstack-usage. If empty, it can be implied that -fstack-usage is not /// passed on the command line. diff --git a/llvm/include/llvm/TextAPI/Platform.h b/llvm/include/llvm/TextAPI/Platform.h index d4225ca533fc0..834f833306d1b 100644 --- a/llvm/include/llvm/TextAPI/Platform.h +++ b/llvm/include/llvm/TextAPI/Platform.h @@ -14,6 +14,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/BinaryFormat/MachO.h" +#include "llvm/Support/VersionTuple.h" namespace llvm { namespace MachO { @@ -27,6 +28,7 @@ StringRef getPlatformName(PlatformType Platform); PlatformType getPlatformFromName(StringRef Name); std::string getOSAndEnvironmentName(PlatformType Platform, std::string Version = ""); +VersionTuple mapToSupportedOSVersion(const Triple &Triple); } // end namespace MachO. } // end namespace llvm. diff --git a/llvm/include/llvm/TextAPI/Target.h b/llvm/include/llvm/TextAPI/Target.h index dc0e4f92ae802..0ab2783fc60c5 100644 --- a/llvm/include/llvm/TextAPI/Target.h +++ b/llvm/include/llvm/TextAPI/Target.h @@ -33,7 +33,7 @@ class Target { : Arch(Arch), Platform(Platform), MinDeployment(MinDeployment) {} explicit Target(const llvm::Triple &Triple) : Arch(mapToArchitecture(Triple)), Platform(mapToPlatformType(Triple)), - MinDeployment(Triple.getOSVersion()) {} + MinDeployment(mapToSupportedOSVersion(Triple)) {} static llvm::Expected create(StringRef Target); diff --git a/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h b/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h index 56e56ed67f7df..475ea48cca932 100644 --- a/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h +++ b/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h @@ -19,9 +19,12 @@ #include "llvm/ADT/StringSet.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/PassManager.h" +#include namespace llvm { +class GlobalValueSummary; class Module; +class ModuleSummaryIndex; class MemProfContextDisambiguation : public PassInfoMixin { @@ -32,6 +35,10 @@ class MemProfContextDisambiguation MemProfContextDisambiguation() {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + + void run(ModuleSummaryIndex &Index, + function_ref + isPrevailing); }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h index 400b9faa94c1b..78761fc78fee8 100644 --- a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h +++ b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h @@ -332,7 +332,8 @@ class FunctionComparator { int cmpOrderings(AtomicOrdering L, AtomicOrdering R) const; int cmpInlineAsm(const InlineAsm *L, const InlineAsm *R) const; int cmpAttrs(const AttributeList L, const AttributeList R) const; - int cmpMetadata(const MDNode *L, const MDNode *R) const; + int cmpMDNode(const MDNode *L, const MDNode *R) const; + int cmpMetadata(const Metadata *L, const Metadata *R) const; int cmpInstMetadata(Instruction const *L, Instruction const *R) const; int cmpOperandBundlesSchema(const CallBase &LCS, const CallBase &RCS) const; diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index ecb0cdbd13c62..eaf0af92484d7 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -6391,10 +6391,13 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, return nullptr; } -static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { - - unsigned NumOperands = Call->arg_size(); - Function *F = cast(Call->getCalledFunction()); +static Value *simplifyIntrinsic(CallBase *Call, Value *Callee, + ArrayRef Args, + const SimplifyQuery &Q) { + // Operand bundles should not be in Args. + assert(Call->arg_size() == Args.size()); + unsigned NumOperands = Args.size(); + Function *F = cast(Callee); Intrinsic::ID IID = F->getIntrinsicID(); // Most of the intrinsics with no operands have some kind of side effect. @@ -6420,18 +6423,17 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { } if (NumOperands == 1) - return simplifyUnaryIntrinsic(F, Call->getArgOperand(0), Q); + return simplifyUnaryIntrinsic(F, Args[0], Q); if (NumOperands == 2) - return simplifyBinaryIntrinsic(F, Call->getArgOperand(0), - Call->getArgOperand(1), Q); + return simplifyBinaryIntrinsic(F, Args[0], Args[1], Q); // Handle intrinsics with 3 or more arguments. switch (IID) { case Intrinsic::masked_load: case Intrinsic::masked_gather: { - Value *MaskArg = Call->getArgOperand(2); - Value *PassthruArg = Call->getArgOperand(3); + Value *MaskArg = Args[2]; + Value *PassthruArg = Args[3]; // If the mask is all zeros or undef, the "passthru" argument is the result. if (maskIsAllZeroOrUndef(MaskArg)) return PassthruArg; @@ -6439,8 +6441,7 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { } case Intrinsic::fshl: case Intrinsic::fshr: { - Value *Op0 = Call->getArgOperand(0), *Op1 = Call->getArgOperand(1), - *ShAmtArg = Call->getArgOperand(2); + Value *Op0 = Args[0], *Op1 = Args[1], *ShAmtArg = Args[2]; // If both operands are undef, the result is undef. if (Q.isUndefValue(Op0) && Q.isUndefValue(Op1)) @@ -6448,14 +6449,14 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { // If shift amount is undef, assume it is zero. if (Q.isUndefValue(ShAmtArg)) - return Call->getArgOperand(IID == Intrinsic::fshl ? 0 : 1); + return Args[IID == Intrinsic::fshl ? 0 : 1]; const APInt *ShAmtC; if (match(ShAmtArg, m_APInt(ShAmtC))) { // If there's effectively no shift, return the 1st arg or 2nd arg. APInt BitWidth = APInt(ShAmtC->getBitWidth(), ShAmtC->getBitWidth()); if (ShAmtC->urem(BitWidth).isZero()) - return Call->getArgOperand(IID == Intrinsic::fshl ? 0 : 1); + return Args[IID == Intrinsic::fshl ? 0 : 1]; } // Rotating zero by anything is zero. @@ -6469,31 +6470,24 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { return nullptr; } case Intrinsic::experimental_constrained_fma: { - Value *Op0 = Call->getArgOperand(0); - Value *Op1 = Call->getArgOperand(1); - Value *Op2 = Call->getArgOperand(2); auto *FPI = cast(Call); - if (Value *V = - simplifyFPOp({Op0, Op1, Op2}, {}, Q, *FPI->getExceptionBehavior(), - *FPI->getRoundingMode())) + if (Value *V = simplifyFPOp(Args, {}, Q, *FPI->getExceptionBehavior(), + *FPI->getRoundingMode())) return V; return nullptr; } case Intrinsic::fma: case Intrinsic::fmuladd: { - Value *Op0 = Call->getArgOperand(0); - Value *Op1 = Call->getArgOperand(1); - Value *Op2 = Call->getArgOperand(2); - if (Value *V = simplifyFPOp({Op0, Op1, Op2}, {}, Q, fp::ebIgnore, + if (Value *V = simplifyFPOp(Args, {}, Q, fp::ebIgnore, RoundingMode::NearestTiesToEven)) return V; return nullptr; } case Intrinsic::smul_fix: case Intrinsic::smul_fix_sat: { - Value *Op0 = Call->getArgOperand(0); - Value *Op1 = Call->getArgOperand(1); - Value *Op2 = Call->getArgOperand(2); + Value *Op0 = Args[0]; + Value *Op1 = Args[1]; + Value *Op2 = Args[2]; Type *ReturnType = F->getReturnType(); // Canonicalize constant operand as Op1 (ConstantFolding handles the case @@ -6520,9 +6514,9 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { return nullptr; } case Intrinsic::vector_insert: { - Value *Vec = Call->getArgOperand(0); - Value *SubVec = Call->getArgOperand(1); - Value *Idx = Call->getArgOperand(2); + Value *Vec = Args[0]; + Value *SubVec = Args[1]; + Value *Idx = Args[2]; Type *ReturnType = F->getReturnType(); // (insert_vector Y, (extract_vector X, 0), 0) -> X @@ -6539,51 +6533,52 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { } case Intrinsic::experimental_constrained_fadd: { auto *FPI = cast(Call); - return simplifyFAddInst( - FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(), - Q, *FPI->getExceptionBehavior(), *FPI->getRoundingMode()); + return simplifyFAddInst(Args[0], Args[1], FPI->getFastMathFlags(), Q, + *FPI->getExceptionBehavior(), + *FPI->getRoundingMode()); } case Intrinsic::experimental_constrained_fsub: { auto *FPI = cast(Call); - return simplifyFSubInst( - FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(), - Q, *FPI->getExceptionBehavior(), *FPI->getRoundingMode()); + return simplifyFSubInst(Args[0], Args[1], FPI->getFastMathFlags(), Q, + *FPI->getExceptionBehavior(), + *FPI->getRoundingMode()); } case Intrinsic::experimental_constrained_fmul: { auto *FPI = cast(Call); - return simplifyFMulInst( - FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(), - Q, *FPI->getExceptionBehavior(), *FPI->getRoundingMode()); + return simplifyFMulInst(Args[0], Args[1], FPI->getFastMathFlags(), Q, + *FPI->getExceptionBehavior(), + *FPI->getRoundingMode()); } case Intrinsic::experimental_constrained_fdiv: { auto *FPI = cast(Call); - return simplifyFDivInst( - FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(), - Q, *FPI->getExceptionBehavior(), *FPI->getRoundingMode()); + return simplifyFDivInst(Args[0], Args[1], FPI->getFastMathFlags(), Q, + *FPI->getExceptionBehavior(), + *FPI->getRoundingMode()); } case Intrinsic::experimental_constrained_frem: { auto *FPI = cast(Call); - return simplifyFRemInst( - FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(), - Q, *FPI->getExceptionBehavior(), *FPI->getRoundingMode()); + return simplifyFRemInst(Args[0], Args[1], FPI->getFastMathFlags(), Q, + *FPI->getExceptionBehavior(), + *FPI->getRoundingMode()); } default: return nullptr; } } -static Value *tryConstantFoldCall(CallBase *Call, const SimplifyQuery &Q) { - auto *F = dyn_cast(Call->getCalledOperand()); +static Value *tryConstantFoldCall(CallBase *Call, Value *Callee, + ArrayRef Args, + const SimplifyQuery &Q) { + auto *F = dyn_cast(Callee); if (!F || !canConstantFoldCallTo(Call, F)) return nullptr; SmallVector ConstantArgs; - unsigned NumArgs = Call->arg_size(); - ConstantArgs.reserve(NumArgs); - for (auto &Arg : Call->args()) { - Constant *C = dyn_cast(&Arg); + ConstantArgs.reserve(Args.size()); + for (Value *Arg : Args) { + Constant *C = dyn_cast(Arg); if (!C) { - if (isa(Arg.get())) + if (isa(Arg)) continue; return nullptr; } @@ -6593,7 +6588,11 @@ static Value *tryConstantFoldCall(CallBase *Call, const SimplifyQuery &Q) { return ConstantFoldCall(Call, F, ConstantArgs, Q.TLI); } -Value *llvm::simplifyCall(CallBase *Call, const SimplifyQuery &Q) { +Value *llvm::simplifyCall(CallBase *Call, Value *Callee, ArrayRef Args, + const SimplifyQuery &Q) { + // Args should not contain operand bundle operands. + assert(Call->arg_size() == Args.size()); + // musttail calls can only be simplified if they are also DCEd. // As we can't guarantee this here, don't simplify them. if (Call->isMustTailCall()) @@ -6601,16 +6600,15 @@ Value *llvm::simplifyCall(CallBase *Call, const SimplifyQuery &Q) { // call undef -> poison // call null -> poison - Value *Callee = Call->getCalledOperand(); if (isa(Callee) || isa(Callee)) return PoisonValue::get(Call->getType()); - if (Value *V = tryConstantFoldCall(Call, Q)) + if (Value *V = tryConstantFoldCall(Call, Callee, Args, Q)) return V; auto *F = dyn_cast(Callee); if (F && F->isIntrinsic()) - if (Value *Ret = simplifyIntrinsic(Call, Q)) + if (Value *Ret = simplifyIntrinsic(Call, Callee, Args, Q)) return Ret; return nullptr; @@ -6618,9 +6616,10 @@ Value *llvm::simplifyCall(CallBase *Call, const SimplifyQuery &Q) { Value *llvm::simplifyConstrainedFPCall(CallBase *Call, const SimplifyQuery &Q) { assert(isa(Call)); - if (Value *V = tryConstantFoldCall(Call, Q)) + SmallVector Args(Call->args()); + if (Value *V = tryConstantFoldCall(Call, Call->getCalledOperand(), Args, Q)) return V; - if (Value *Ret = simplifyIntrinsic(Call, Q)) + if (Value *Ret = simplifyIntrinsic(Call, Call->getCalledOperand(), Args, Q)) return Ret; return nullptr; } @@ -6775,8 +6774,9 @@ static Value *simplifyInstructionWithOperands(Instruction *I, case Instruction::PHI: return simplifyPHINode(cast(I), NewOps, Q); case Instruction::Call: - // TODO: Use NewOps - return simplifyCall(cast(I), Q); + return simplifyCall( + cast(I), NewOps.back(), + NewOps.drop_back(1 + cast(I)->getNumTotalBundleOperands()), Q); case Instruction::Freeze: return llvm::simplifyFreezeInst(NewOps[0], Q); #define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc: diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index df525f4d6be7a..df872f61906c8 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -4988,6 +4988,18 @@ ScalarEvolution::proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) { SCEV::NoWrapFlags Result = SCEV::FlagAnyWrap; + if (!AR->hasNoSelfWrap()) { + const SCEV *BECount = getConstantMaxBackedgeTakenCount(AR->getLoop()); + if (const SCEVConstant *BECountMax = dyn_cast(BECount)) { + ConstantRange StepCR = getSignedRange(AR->getStepRecurrence(*this)); + const APInt &BECountAP = BECountMax->getAPInt(); + unsigned NoOverflowBitWidth = + BECountAP.getActiveBits() + StepCR.getMinSignedBits(); + if (NoOverflowBitWidth <= getTypeSizeInBits(AR->getType())) + Result = ScalarEvolution::setFlags(Result, SCEV::FlagNW); + } + } + if (!AR->hasNoSignedWrap()) { ConstantRange AddRecRange = getSignedRange(AR); ConstantRange IncRange = getSignedRange(AR->getStepRecurrence(*this)); diff --git a/llvm/lib/AsmParser/Parser.cpp b/llvm/lib/AsmParser/Parser.cpp index 035eea81378e5..eded892f358a8 100644 --- a/llvm/lib/AsmParser/Parser.cpp +++ b/llvm/lib/AsmParser/Parser.cpp @@ -28,9 +28,9 @@ static bool parseAssemblyInto(MemoryBufferRef F, Module *M, std::unique_ptr Buf = MemoryBuffer::getMemBuffer(F); SM.AddNewSourceBuffer(std::move(Buf), SMLoc()); - LLVMContext Context; + std::optional OptContext; return LLParser(F.getBuffer(), SM, Err, M, Index, - M ? M->getContext() : Context, Slots) + M ? M->getContext() : OptContext.emplace(), Slots) .Run(UpgradeDebugInfo, DataLayoutCallback); } diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp index 1146c1d465da5..5a005ba7b414d 100644 --- a/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -157,7 +157,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, unsigned NumInstr = 0; // Number of instructions using LI SmallPtrSet Visited; - std::pair TargetHint = MRI.getRegAllocationHint(LI.reg()); + std::pair TargetHint = MRI.getRegAllocationHint(LI.reg()); if (LI.isSpillable()) { Register Reg = LI.reg(); diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 8d11f282516c8..b571b5a8ab5f4 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2279,7 +2279,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) { if (!Arg->getType()->isPointerTy()) continue; unsigned AS = Arg->getType()->getPointerAddressSpace(); - return optimizeMemoryInst(CI, Arg, Arg->getType(), AS); + if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS)) + return true; } IntrinsicInst *II = dyn_cast(CI); diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp index 51d259cea41b8..5ef650787a585 100644 --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -103,6 +103,7 @@ CGOPT(bool, XRayOmitFunctionIndex) CGOPT(bool, DebugStrictDwarf) CGOPT(unsigned, AlignLoops) CGOPT(bool, JMCInstrument) +CGOPT(bool, XCOFFReadOnlyPointers) codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { #define CGBINDOPT(NAME) \ @@ -478,6 +479,13 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::init(false)); CGBINDOPT(JMCInstrument); + static cl::opt XCOFFReadOnlyPointers( + "mroptr", + cl::desc("When set to true, const objects with relocatable address " + "values are put into the RO data section."), + cl::init(false)); + CGBINDOPT(XCOFFReadOnlyPointers); + #undef CGBINDOPT mc::RegisterMCTargetOptionsFlags(); @@ -554,6 +562,7 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { Options.DebugStrictDwarf = getDebugStrictDwarf(); Options.LoopAlignment = getAlignLoops(); Options.JMCInstrument = getJMCInstrument(); + Options.XCOFFReadOnlyPointers = getXCOFFReadOnlyPointers(); Options.MCOptions = mc::InitMCTargetOptionsFromFlags(); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4a34d4724ae8f..20d7447802c8a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4105,7 +4105,7 @@ SDValue DAGCombiner::visitSUBO(SDNode *N) { ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); // fold (subox, c) -> (addo x, -c) - if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) { + if (IsSigned && N1C && !N1C->isMinSignedValue()) { return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); } @@ -4585,7 +4585,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return DAG.getNegative(N0, DL, VT); // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0) - if (N1C && N1C->getAPIntValue().isMinSignedValue()) + if (N1C && N1C->isMinSignedValue()) return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT)); @@ -12210,7 +12210,8 @@ static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) { /// This function is called by the DAGCombiner when visiting sext/zext/aext /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, - SelectionDAG &DAG) { + SelectionDAG &DAG, + CombineLevel Level) { unsigned Opcode = N->getOpcode(); SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -12235,10 +12236,14 @@ static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, else if (Opcode == ISD::ZERO_EXTEND) ExtLoadOpcode = ISD::ZEXTLOAD; + // Illegal VSELECT may ISel fail if happen after legalization (DAG + // Combine2), so we should conservatively check the OperationAction. LoadSDNode *Load1 = cast(Op1); LoadSDNode *Load2 = cast(Op2); if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) || - !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT())) + !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) || + (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes && + TLI.getOperationAction(ISD::VSELECT, VT) != TargetLowering::Legal)) return SDValue(); SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1); @@ -13106,7 +13111,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT)); } - if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG)) + if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level)) return Res; return SDValue(); @@ -13457,7 +13462,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (SDValue V = widenAbs(N, DAG)) return V; - if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG)) + if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level)) return Res; return SDValue(); @@ -13618,7 +13623,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { if (SDValue NewCtPop = widenCtPop(N, DAG)) return NewCtPop; - if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG)) + if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level)) return Res; return SDValue(); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 3511e76ac1df2..8199e5d5a9c18 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3628,9 +3628,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } else { // We test only the i1 bit. Skip the AND if UNDEF or another AND. if (Tmp2.isUndef() || - (Tmp2.getOpcode() == ISD::AND && - isa(Tmp2.getOperand(1)) && - cast(Tmp2.getOperand(1))->getZExtValue() == 1)) + (Tmp2.getOpcode() == ISD::AND && isOneConstant(Tmp2.getOperand(1)))) Tmp3 = Tmp2; else Tmp3 = DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index f741ee4849dfc..c4f2fbc90e3eb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -3026,7 +3026,14 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, if (isOneConstant(LoOps[1])) Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, DAG.getConstant(0, dl, NVT), ISD::SETEQ); - else + else if (isAllOnesConstant(LoOps[1])) { + if (isAllOnesConstant(HiOps[1])) + Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), LoOps[0], + DAG.getConstant(0, dl, NVT), ISD::SETEQ); + else + Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), LoOps[0], + DAG.getConstant(0, dl, NVT), ISD::SETNE); + } else Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0], ISD::SETULT); @@ -3037,7 +3044,10 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, Carry = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT), DAG.getConstant(0, dl, NVT)); - Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry); + if (isAllOnesConstant(LoOps[1]) && isAllOnesConstant(HiOps[1])) + Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps[0], Carry); + else + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry); } else { Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps); Hi = DAG.getNode(ISD::SUB, dl, NVT, ArrayRef(HiOps, 2)); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index c82f9ce64ea5a..b7b67a20bc9e9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3908,8 +3908,7 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck( SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift( EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, DAGCombinerInfo &DCI, const SDLoc &DL) const { - assert(isConstOrConstSplat(N1C) && - isConstOrConstSplat(N1C)->getAPIntValue().isZero() && + assert(isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->isZero() && "Should be a comparison with 0."); assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Valid only for [in]equality comparisons."); @@ -4738,8 +4737,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // For example, when high 32-bits of i64 X are known clear: // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1 - bool CmpZero = N1C->getAPIntValue().isZero(); - bool CmpNegOne = N1C->getAPIntValue().isAllOnes(); + bool CmpZero = N1C->isZero(); + bool CmpNegOne = N1C->isAllOnes(); if ((CmpZero || CmpNegOne) && N0.hasOneUse()) { // Match or(lo,shl(hi,bw/2)) pattern. auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) { @@ -4974,6 +4973,23 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, N0, N1, NewCond); } + // ~X > ~Y --> Y > X + // ~X < ~Y --> Y < X + // ~X < C --> X > ~C + // ~X > C --> X < ~C + if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) && + N0.getValueType().isInteger()) { + if (isBitwiseNot(N0)) { + if (isBitwiseNot(N1)) + return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond); + + if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) { + SDValue Not = DAG.getNOT(dl, N1, OpVT); + return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond); + } + } + } + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && N0.getValueType().isInteger()) { if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB || diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp index aa92dcb386560..05ac176461a5c 100644 --- a/llvm/lib/CodeGen/StackProtector.cpp +++ b/llvm/lib/CodeGen/StackProtector.cpp @@ -15,6 +15,7 @@ #include "llvm/CodeGen/StackProtector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/MemoryLocation.h" @@ -620,18 +621,19 @@ BasicBlock *StackProtector::CreateFailBB() { if (F->getSubprogram()) B.SetCurrentDebugLocation( DILocation::get(Context, 0, 0, F->getSubprogram())); + FunctionCallee StackChkFail; + SmallVector Args; if (Trip.isOSOpenBSD()) { - FunctionCallee StackChkFail = M->getOrInsertFunction( - "__stack_smash_handler", Type::getVoidTy(Context), - Type::getInt8PtrTy(Context)); - - B.CreateCall(StackChkFail, B.CreateGlobalStringPtr(F->getName(), "SSH")); + StackChkFail = M->getOrInsertFunction("__stack_smash_handler", + Type::getVoidTy(Context), + Type::getInt8PtrTy(Context)); + Args.push_back(B.CreateGlobalStringPtr(F->getName(), "SSH")); } else { - FunctionCallee StackChkFail = + StackChkFail = M->getOrInsertFunction("__stack_chk_fail", Type::getVoidTy(Context)); - - B.CreateCall(StackChkFail, {}); } + cast(StackChkFail.getCallee())->addFnAttr(Attribute::NoReturn); + B.CreateCall(StackChkFail, Args); B.CreateUnreachable(); return FailBB; } diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index e2fbe027f15b0..c81b6bb623b96 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -2343,8 +2343,11 @@ MCSection *TargetLoweringObjectFileXCOFF::getExplicitSectionGlobal( XCOFF::StorageMappingClass MappingClass; if (Kind.isText()) MappingClass = XCOFF::XMC_PR; - else if (Kind.isData() || Kind.isReadOnlyWithRel() || Kind.isBSS()) + else if (Kind.isData() || Kind.isBSS()) MappingClass = XCOFF::XMC_RW; + else if (Kind.isReadOnlyWithRel()) + MappingClass = + TM.Options.XCOFFReadOnlyPointers ? XCOFF::XMC_RO : XCOFF::XMC_RW; else if (Kind.isReadOnly()) MappingClass = XCOFF::XMC_RO; else @@ -2429,9 +2432,18 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal( return TextSection; } - // TODO: We may put Kind.isReadOnlyWithRel() under option control, because - // user may want to have read-only data with relocations placed into a - // read-only section by the compiler. + if (TM.Options.XCOFFReadOnlyPointers && Kind.isReadOnlyWithRel()) { + if (!TM.getDataSections()) + report_fatal_error( + "ReadOnlyPointers is supported only if data sections is turned on"); + + SmallString<128> Name; + getNameWithPrefix(Name, GO, TM); + return getContext().getXCOFFSection( + Name, SectionKind::getReadOnly(), + XCOFF::CsectProperties(XCOFF::XMC_RO, XCOFF::XTY_SD)); + } + // For BSS kind, zero initialized data must be emitted to the .data section // because external linkage control sections that get mapped to the .bss // section will be linked as tentative defintions, which is only appropriate diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp index e6baf00c06451..051de1612284c 100644 --- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -424,8 +424,8 @@ bool TargetRegisterInfo::getRegAllocationHints( SmallVectorImpl &Hints, const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { const MachineRegisterInfo &MRI = MF.getRegInfo(); - const std::pair> &Hints_MRI = - MRI.getRegAllocationHints(VirtReg); + const std::pair> &Hints_MRI = + MRI.getRegAllocationHints(VirtReg); SmallSet HintedRegs; // First hint may be a target hint. diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp index f80b06d7e9b7c..8e00712d2308e 100644 --- a/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/llvm/lib/CodeGen/VirtRegMap.cpp @@ -116,10 +116,10 @@ bool VirtRegMap::hasPreferredPhys(Register VirtReg) const { } bool VirtRegMap::hasKnownPreference(Register VirtReg) const { - std::pair Hint = MRI->getRegAllocationHint(VirtReg); - if (Register::isPhysicalRegister(Hint.second)) + std::pair Hint = MRI->getRegAllocationHint(VirtReg); + if (Hint.second.isPhysical()) return true; - if (Register::isVirtualRegister(Hint.second)) + if (Hint.second.isVirtual()) return hasPhys(Hint.second); return false; } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index 0725bd7744aea..dc46c76c06e86 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -1505,6 +1505,21 @@ DWARFUnit *DWARFDebugLine::SectionParser::prepareToParse(uint64_t Offset) { return U; } +bool DWARFDebugLine::SectionParser::hasValidVersion(uint64_t Offset) { + DataExtractor::Cursor Cursor(Offset); + auto [TotalLength, _] = DebugLineData.getInitialLength(Cursor); + DWARFDataExtractor HeaderData(DebugLineData, Cursor.tell() + TotalLength); + uint16_t Version = HeaderData.getU16(Cursor); + if (!Cursor) { + // Ignore any error here. + // If this is not the end of the section parseNext() will still be + // attempted, where this error will occur again (and can be handled). + consumeError(Cursor.takeError()); + return false; + } + return versionIsSupported(Version); +} + void DWARFDebugLine::SectionParser::moveToNextTable(uint64_t OldOffset, const Prologue &P) { // If the length field is not valid, we don't know where the next table is, so @@ -1518,5 +1533,29 @@ void DWARFDebugLine::SectionParser::moveToNextTable(uint64_t OldOffset, Offset = OldOffset + P.TotalLength + P.sizeofTotalLength(); if (!DebugLineData.isValidOffset(Offset)) { Done = true; + return; + } + + // Heuristic: If the version is valid, then this is probably a line table. + // Otherwise, the offset might need alignment (to a 4 or 8 byte boundary). + if (hasValidVersion(Offset)) + return; + + // ARM C/C++ Compiler aligns each line table to word boundaries and pads out + // the .debug_line section to a word multiple. Note that in the specification + // this does not seem forbidden since each unit has a DW_AT_stmt_list. + for (unsigned Align : {4, 8}) { + uint64_t AlignedOffset = alignTo(Offset, Align); + if (!DebugLineData.isValidOffset(AlignedOffset)) { + // This is almost certainly not another line table but some alignment + // padding. This assumes the alignments tested are ordered, and are + // smaller than the header size (which is true for 4 and 8). + Done = true; + return; + } + if (hasValidVersion(AlignedOffset)) { + Offset = AlignedOffset; + break; + } } } diff --git a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h index 26feb8ea3277b..9d2d4958dcf6c 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h +++ b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h @@ -112,6 +112,17 @@ class ELFLinkGraphBuilder : public ELFLinkGraphBuilderBase { Expected> getSymbolLinkageAndScope(const typename ELFT::Sym &Sym, StringRef Name); + /// Set the target flags on the given Symbol. + virtual TargetFlagsType makeTargetFlags(const typename ELFT::Sym &Sym) { + return TargetFlagsType{}; + } + + /// Get the physical offset of the symbol on the target platform. + virtual orc::ExecutorAddrDiff getRawOffset(const typename ELFT::Sym &Sym, + TargetFlagsType Flags) { + return Sym.getValue(); + } + Error prepare(); Error graphifySections(); Error graphifySymbols(); @@ -478,6 +489,9 @@ template Error ELFLinkGraphBuilder::graphifySymbols() { << "\"\n"; }); + TargetFlagsType Flags = makeTargetFlags(Sym); + orc::ExecutorAddrDiff Offset = getRawOffset(Sym, Flags); + // In RISCV, temporary symbols (Used to generate dwarf, eh_frame // sections...) will appear in object code's symbol table, and LLVM does // not use names on these temporary symbols (RISCV gnu toolchain uses @@ -485,10 +499,13 @@ template Error ELFLinkGraphBuilder::graphifySymbols() { // anonymous symbol. auto &GSym = Name->empty() - ? G->addAnonymousSymbol(*B, Sym.getValue(), Sym.st_size, + ? G->addAnonymousSymbol(*B, Offset, Sym.st_size, false, false) - : G->addDefinedSymbol(*B, Sym.getValue(), *Name, Sym.st_size, L, - S, Sym.getType() == ELF::STT_FUNC, false); + : G->addDefinedSymbol(*B, Offset, *Name, Sym.st_size, L, + S, Sym.getType() == ELF::STT_FUNC, + false); + + GSym.setTargetFlags(Flags); setGraphSymbol(SymIndex, GSym); } } else if (Sym.isUndefined() && Sym.isExternal()) { diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index 82fa4bed914e6..9b6712818363e 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -1438,16 +1438,23 @@ void JITDylib::dump(raw_ostream &OS) { OS << "Link order: " << LinkOrder << "\n" << "Symbol table:\n"; - for (auto &KV : Symbols) { + // Sort symbols so we get a deterministic order and can check them in tests. + std::vector> SymbolsSorted; + for (auto &KV : Symbols) + SymbolsSorted.emplace_back(KV.first, &KV.second); + std::sort(SymbolsSorted.begin(), SymbolsSorted.end(), + [](const auto &L, const auto &R) { return *L.first < *R.first; }); + + for (auto &KV : SymbolsSorted) { OS << " \"" << *KV.first << "\": "; - if (auto Addr = KV.second.getAddress()) + if (auto Addr = KV.second->getAddress()) OS << Addr; else OS << " "; - OS << " " << KV.second.getFlags() << " " << KV.second.getState(); + OS << " " << KV.second->getFlags() << " " << KV.second->getState(); - if (KV.second.hasMaterializerAttached()) { + if (KV.second->hasMaterializerAttached()) { OS << " (Materializer "; auto I = UnmaterializedInfos.find(KV.first); assert(I != UnmaterializedInfos.end() && diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp index e7356e5d3151e..2c270cd66285d 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp @@ -39,6 +39,10 @@ bool hasInitializerSection(jitlink::LinkGraph &G) { return false; } +JITTargetAddress getJITSymbolPtrForSymbol(Symbol &Sym) { + return Sym.getAddress().getValue(); +} + JITSymbolFlags getJITSymbolFlagsForSymbol(Symbol &Sym) { JITSymbolFlags Flags; @@ -215,10 +219,9 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { for (auto *Sym : G.defined_symbols()) if (Sym->hasName() && Sym->getScope() != Scope::Local) { auto InternedName = ES.intern(Sym->getName()); + auto Ptr = getJITSymbolPtrForSymbol(*Sym); auto Flags = getJITSymbolFlagsForSymbol(*Sym); - - InternedResult[InternedName] = - JITEvaluatedSymbol(Sym->getAddress().getValue(), Flags); + InternedResult[InternedName] = JITEvaluatedSymbol(Ptr, Flags); if (AutoClaim && !MR->getSymbols().count(InternedName)) { assert(!ExtraSymbolsToClaim.count(InternedName) && "Duplicate symbol to claim?"); @@ -229,9 +232,9 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { for (auto *Sym : G.absolute_symbols()) if (Sym->hasName() && Sym->getScope() != Scope::Local) { auto InternedName = ES.intern(Sym->getName()); + auto Ptr = getJITSymbolPtrForSymbol(*Sym); auto Flags = getJITSymbolFlagsForSymbol(*Sym); - InternedResult[InternedName] = - JITEvaluatedSymbol(Sym->getAddress().getValue(), Flags); + InternedResult[InternedName] = JITEvaluatedSymbol(Ptr, Flags); if (AutoClaim && !MR->getSymbols().count(InternedName)) { assert(!ExtraSymbolsToClaim.count(InternedName) && "Duplicate symbol to claim?"); diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 81e2904bbd019..12c7c42ac5fe5 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -3999,13 +3999,13 @@ Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(Function *OutlinedFn, } void OpenMPIRBuilder::emitTargetRegionFunction( - OffloadEntriesInfoManager &InfoManager, TargetRegionEntryInfo &EntryInfo, + TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, int32_t NumTeams, int32_t NumThreads, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID) { SmallString<64> EntryFnName; - InfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo); + OffloadInfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo); OutlinedFn = Config.isEmbedded() || !Config.openMPOffloadMandatory() ? GenerateFunctionCallback(EntryFnName) @@ -4023,19 +4023,18 @@ void OpenMPIRBuilder::emitTargetRegionFunction( : createPlatformSpecificName({EntryFnName, "region_id"}); OutlinedFnID = registerTargetRegionFunction( - InfoManager, EntryInfo, OutlinedFn, EntryFnName, EntryFnIDName, NumTeams, - NumThreads); + EntryInfo, OutlinedFn, EntryFnName, EntryFnIDName, NumTeams, NumThreads); } Constant *OpenMPIRBuilder::registerTargetRegionFunction( - OffloadEntriesInfoManager &InfoManager, TargetRegionEntryInfo &EntryInfo, - Function *OutlinedFn, StringRef EntryFnName, StringRef EntryFnIDName, - int32_t NumTeams, int32_t NumThreads) { + TargetRegionEntryInfo &EntryInfo, Function *OutlinedFn, + StringRef EntryFnName, StringRef EntryFnIDName, int32_t NumTeams, + int32_t NumThreads) { if (OutlinedFn) setOutlinedTargetRegionFunctionAttributes(OutlinedFn, NumTeams, NumThreads); auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName); auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName); - InfoManager.registerTargetRegionEntryInfo( + OffloadInfoManager.registerTargetRegionEntryInfo( EntryInfo, EntryAddr, OutlinedFnID, OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion); return OutlinedFnID; @@ -4897,18 +4896,17 @@ void OpenMPIRBuilder::createOffloadEntry(Constant *ID, Constant *Addr, // We only generate metadata for function that contain target regions. void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata( - OffloadEntriesInfoManager &OffloadEntriesInfoManager, EmitMetadataErrorReportFunctionTy &ErrorFn) { // If there are no entries, we don't need to do anything. - if (OffloadEntriesInfoManager.empty()) + if (OffloadInfoManager.empty()) return; LLVMContext &C = M.getContext(); SmallVector, 16> - OrderedEntries(OffloadEntriesInfoManager.size()); + OrderedEntries(OffloadInfoManager.size()); // Auxiliary methods to create metadata values and strings. auto &&GetMDInt = [this](unsigned V) { @@ -4947,8 +4945,7 @@ void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata( MD->addOperand(MDNode::get(C, Ops)); }; - OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( - TargetRegionMetadataEmitter); + OffloadInfoManager.actOnTargetRegionEntriesInfo(TargetRegionMetadataEmitter); // Create function that emits metadata for each device global variable entry; auto &&DeviceGlobalVarMetadataEmitter = @@ -4973,7 +4970,7 @@ void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata( MD->addOperand(MDNode::get(C, Ops)); }; - OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( + OffloadInfoManager.actOnDeviceGlobalVarEntriesInfo( DeviceGlobalVarMetadataEmitter); for (const auto &E : OrderedEntries) { @@ -5061,8 +5058,7 @@ void OffloadEntriesInfoManager::getTargetRegionEntryFnName( /// Loads all the offload entries information from the host IR /// metadata. -void OpenMPIRBuilder::loadOffloadInfoMetadata( - Module &M, OffloadEntriesInfoManager &OffloadEntriesInfoManager) { +void OpenMPIRBuilder::loadOffloadInfoMetadata(Module &M) { // If we are in target mode, load the metadata from the host IR. This code has // to match the metadata creation in createOffloadEntriesAndInfoMetadata(). @@ -5092,13 +5088,13 @@ void OpenMPIRBuilder::loadOffloadInfoMetadata( /*FileID=*/GetMDInt(2), /*Line=*/GetMDInt(4), /*Count=*/GetMDInt(5)); - OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( - EntryInfo, /*Order=*/GetMDInt(6)); + OffloadInfoManager.initializeTargetRegionEntryInfo(EntryInfo, + /*Order=*/GetMDInt(6)); break; } case OffloadEntriesInfoManager::OffloadEntryInfo:: OffloadingEntryInfoDeviceGlobalVar: - OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( + OffloadInfoManager.initializeDeviceGlobalVarEntryInfo( /*MangledName=*/GetMDString(1), static_cast( /*Flags=*/GetMDInt(2)), @@ -5147,7 +5143,7 @@ void OffloadEntriesInfoManager::registerTargetRegionEntryInfo( // If we are emitting code for a target, the entry is already initialized, // only has to be registered. - if (Config.isEmbedded()) { + if (OMPBuilder->Config.isEmbedded()) { // This could happen if the device compilation is invoked standalone. if (!hasTargetRegionEntryInfo(EntryInfo)) { return; @@ -5202,7 +5198,7 @@ void OffloadEntriesInfoManager::initializeDeviceGlobalVarEntryInfo( void OffloadEntriesInfoManager::registerDeviceGlobalVarEntryInfo( StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage) { - if (Config.isEmbedded()) { + if (OMPBuilder->Config.isEmbedded()) { // This could happen if the device compilation is invoked standalone. if (!hasDeviceGlobalVarEntryInfo(VarName)) return; diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index ba68e6be05b52..a4b00d92ea89a 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -899,14 +899,6 @@ ConstantInt *ConstantInt::get(IntegerType *Ty, uint64_t V, bool isSigned) { return get(Ty->getContext(), APInt(Ty->getBitWidth(), V, isSigned)); } -ConstantInt *ConstantInt::getSigned(IntegerType *Ty, int64_t V) { - return get(Ty, V, true); -} - -Constant *ConstantInt::getSigned(Type *Ty, int64_t V) { - return get(Ty, V, true); -} - Constant *ConstantInt::get(Type *Ty, const APInt& V) { ConstantInt *C = get(Ty->getContext(), V); assert(C->getType() == Ty->getScalarType() && diff --git a/llvm/lib/IR/MDBuilder.cpp b/llvm/lib/IR/MDBuilder.cpp index 38ab1d3d10244..2490b3012bdc2 100644 --- a/llvm/lib/IR/MDBuilder.cpp +++ b/llvm/lib/IR/MDBuilder.cpp @@ -336,12 +336,12 @@ MDNode *MDBuilder::createIrrLoopHeaderWeight(uint64_t Weight) { } MDNode *MDBuilder::createPseudoProbeDesc(uint64_t GUID, uint64_t Hash, - Function *F) { + StringRef FName) { auto *Int64Ty = Type::getInt64Ty(Context); SmallVector Ops(3); Ops[0] = createConstant(ConstantInt::get(Int64Ty, GUID)); Ops[1] = createConstant(ConstantInt::get(Int64Ty, Hash)); - Ops[2] = createString(F->getName()); + Ops[2] = createString(FName); return MDNode::get(Context, Ops); } diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 1f273a8e5025f..ee6b8c3aa234d 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -51,6 +51,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" #include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" #include "llvm/Transforms/Utils/SplitModule.h" @@ -75,6 +76,9 @@ cl::opt EnableLTOInternalization( cl::desc("Enable global value internalization in LTO")); } +/// Enable MemProf context disambiguation for thin link. +extern cl::opt EnableMemProfContextDisambiguation; + // Computes a unique hash for the Module considering the current list of // export/import and other global analysis results. // The hash is produced in \p Key. @@ -1539,6 +1543,14 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, runWholeProgramDevirtOnIndex(ThinLTO.CombinedIndex, ExportedGUIDs, LocalWPDTargetsMap); + auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) { + return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath(); + }; + if (EnableMemProfContextDisambiguation) { + MemProfContextDisambiguation ContextDisambiguation; + ContextDisambiguation.run(ThinLTO.CombinedIndex, isPrevailing); + } + if (Conf.OptLevel > 0) ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, ImportLists, ExportLists); @@ -1580,10 +1592,6 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, updateIndexWPDForExports(ThinLTO.CombinedIndex, isExported, LocalWPDTargetsMap); - auto isPrevailing = [&](GlobalValue::GUID GUID, - const GlobalValueSummary *S) { - return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath(); - }; thinLTOInternalizeAndPromoteInIndex(ThinLTO.CombinedIndex, isExported, isPrevailing); diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index 006f697b61875..fa1ab2717af2e 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -194,7 +194,7 @@ class MCAsmStreamer final : public MCStreamer { void emitXCOFFRenameDirective(const MCSymbol *Name, StringRef Rename) override; - void emitXCOFFRefDirective(StringRef Name) override; + void emitXCOFFRefDirective(const MCSymbol *Symbol) override; void emitXCOFFExceptDirective(const MCSymbol *Symbol, const MCSymbol *Trap, @@ -943,8 +943,9 @@ void MCAsmStreamer::emitXCOFFRenameDirective(const MCSymbol *Name, EmitEOL(); } -void MCAsmStreamer::emitXCOFFRefDirective(StringRef Name) { - OS << "\t.ref " << Name; +void MCAsmStreamer::emitXCOFFRefDirective(const MCSymbol *Symbol) { + OS << "\t.ref "; + Symbol->print(OS, MAI); EmitEOL(); } diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp index 517e258844156..4dd3163fd399d 100644 --- a/llvm/lib/MC/MCStreamer.cpp +++ b/llvm/lib/MC/MCStreamer.cpp @@ -1190,7 +1190,7 @@ void MCStreamer::emitXCOFFRenameDirective(const MCSymbol *Name, "XCOFF targets"); } -void MCStreamer::emitXCOFFRefDirective(StringRef Name) { +void MCStreamer::emitXCOFFRefDirective(const MCSymbol *Symbol) { llvm_unreachable("emitXCOFFRefDirective is only supported on XCOFF targets"); } diff --git a/llvm/lib/MC/MCXCOFFStreamer.cpp b/llvm/lib/MC/MCXCOFFStreamer.cpp index 25a678c68416e..d8ac07bc85b1b 100644 --- a/llvm/lib/MC/MCXCOFFStreamer.cpp +++ b/llvm/lib/MC/MCXCOFFStreamer.cpp @@ -81,6 +81,21 @@ void MCXCOFFStreamer::emitXCOFFSymbolLinkageWithVisibility( emitSymbolAttribute(Symbol, Visibility); } +void MCXCOFFStreamer::emitXCOFFRefDirective(const MCSymbol *Symbol) { + // Add a Fixup here to later record a relocation of type R_REF to prevent the + // ref symbol from being garbage collected (by the binder). + MCDataFragment *DF = getOrCreateDataFragment(); + const MCSymbolRefExpr *SRE = MCSymbolRefExpr::create(Symbol, getContext()); + std::optional MaybeKind = + getAssembler().getBackend().getFixupKind("R_REF"); + if (!MaybeKind) + report_fatal_error("failed to get fixup kind for R_REF relocation"); + + MCFixupKind Kind = *MaybeKind; + MCFixup Fixup = MCFixup::create(DF->getContents().size(), SRE, Kind); + DF->getFixups().push_back(Fixup); +} + void MCXCOFFStreamer::emitXCOFFExceptDirective(const MCSymbol *Symbol, const MCSymbol *Trap, unsigned Lang, unsigned Reason, diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp index c79bdeb2cac4c..6452050d5941e 100644 --- a/llvm/lib/MC/XCOFFObjectWriter.cpp +++ b/llvm/lib/MC/XCOFFObjectWriter.cpp @@ -663,7 +663,10 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm, // instr address plus any constant value. FixedValue = SectionMap[SymASec]->Address - BRInstrAddress + Target.getConstant(); - } + } else if (Type == XCOFF::RelocationType::R_REF) + // The FixedValue should always be 0 since it specifies a nonrelocating + // reference. + FixedValue = 0; assert((Fixup.getOffset() <= MaxRawDataSize - Layout.getFragmentOffset(Fragment)) && diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp index 0d3aad658fe43..2d0f92e43a344 100644 --- a/llvm/lib/Object/ArchiveWriter.cpp +++ b/llvm/lib/Object/ArchiveWriter.cpp @@ -17,6 +17,7 @@ #include "llvm/BinaryFormat/Magic.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Object/Archive.h" +#include "llvm/Object/COFF.h" #include "llvm/Object/Error.h" #include "llvm/Object/IRObjectFile.h" #include "llvm/Object/MachO.h" @@ -43,6 +44,10 @@ using namespace llvm; +struct SymMap { + std::map Map; +}; + NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef) : Buf(MemoryBuffer::getMemBuffer(BufRef, false)), MemberName(BufRef.getBufferIdentifier()) {} @@ -169,18 +174,21 @@ static bool isAIXBigArchive(object::Archive::Kind Kind) { return Kind == object::Archive::K_AIXBIG; } +static bool isCOFFArchive(object::Archive::Kind Kind) { + return Kind == object::Archive::K_COFF; +} + static bool isBSDLike(object::Archive::Kind Kind) { switch (Kind) { case object::Archive::K_GNU: case object::Archive::K_GNU64: case object::Archive::K_AIXBIG: + case object::Archive::K_COFF: return false; case object::Archive::K_BSD: case object::Archive::K_DARWIN: case object::Archive::K_DARWIN64: return true; - case object::Archive::K_COFF: - break; } llvm_unreachable("not supported for writting"); } @@ -191,6 +199,10 @@ static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val) { isBSDLike(Kind) ? support::little : support::big); } +template static void printLE(raw_ostream &Out, T Val) { + support::endian::write(Out, Val, support::little); +} + static void printRestOfMemberHeader( raw_ostream &Out, const sys::TimePoint &ModTime, unsigned UID, unsigned GID, unsigned Perms, uint64_t Size) { @@ -295,7 +307,11 @@ printMemberHeader(raw_ostream &Out, uint64_t Pos, raw_ostream &StringTable, auto Insertion = MemberNames.insert({M.MemberName, uint64_t(0)}); if (Insertion.second) { Insertion.first->second = StringTable.tell(); - StringTable << M.MemberName << "/\n"; + StringTable << M.MemberName; + if (isCOFFArchive(Kind)) + StringTable << '\0'; + else + StringTable << "/\n"; } NamePos = Insertion.first->second; } @@ -356,7 +372,7 @@ static void printNBits(raw_ostream &Out, object::Archive::Kind Kind, static uint64_t computeSymbolTableSize(object::Archive::Kind Kind, uint64_t NumSyms, uint64_t OffsetSize, - StringRef StringTable, + uint64_t StringTableSize, uint32_t *Padding = nullptr) { assert((OffsetSize == 4 || OffsetSize == 8) && "Unsupported OffsetSize"); uint64_t Size = OffsetSize; // Number of entries @@ -366,7 +382,7 @@ static uint64_t computeSymbolTableSize(object::Archive::Kind Kind, Size += NumSyms * OffsetSize; // Table if (isBSDLike(Kind)) Size += OffsetSize; // byte count - Size += StringTable.size(); + Size += StringTableSize; // ld64 expects the members to be 8-byte aligned for 64-bit content and at // least 4-byte aligned for 32-bit content. Opt for the larger encoding // uniformly. @@ -376,6 +392,22 @@ static uint64_t computeSymbolTableSize(object::Archive::Kind Kind, uint32_t Pad = isAIXBigArchive(Kind) ? 0 : offsetToAlignment(Size, Align(isBSDLike(Kind) ? 8 : 2)); + + Size += Pad; + if (Padding) + *Padding = Pad; + return Size; +} + +static uint64_t computeSymbolMapSize(uint64_t NumObj, SymMap &SymMap, + uint32_t *Padding = nullptr) { + uint64_t Size = sizeof(uint32_t) * 2; // Number of symbols and objects entries + Size += NumObj * sizeof(uint32_t); // Offset table + + for (auto S : SymMap.Map) + Size += sizeof(uint16_t) + S.first.length() + 1; + + uint32_t Pad = offsetToAlignment(Size, Align(2)); Size += Pad; if (Padding) *Padding = Pad; @@ -398,13 +430,35 @@ static void writeSymbolTableHeader(raw_ostream &Out, object::Archive::Kind Kind, } } +static uint64_t computeHeadersSize(object::Archive::Kind Kind, + uint64_t NumMembers, + uint64_t StringMemberSize, uint64_t NumSyms, + uint64_t SymNamesSize, SymMap *SymMap) { + uint32_t OffsetSize = is64BitKind(Kind) ? 8 : 4; + uint64_t SymtabSize = + computeSymbolTableSize(Kind, NumSyms, OffsetSize, SymNamesSize); + auto computeSymbolTableHeaderSize = [=] { + SmallString<0> TmpBuf; + raw_svector_ostream Tmp(TmpBuf); + writeSymbolTableHeader(Tmp, Kind, true, SymtabSize); + return TmpBuf.size(); + }; + uint32_t HeaderSize = computeSymbolTableHeaderSize(); + uint64_t Size = strlen("!\n") + HeaderSize + SymtabSize; + + if (SymMap) + Size += HeaderSize + computeSymbolMapSize(NumMembers, *SymMap); + + return Size + StringMemberSize; +} + static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, bool Deterministic, ArrayRef Members, - StringRef StringTable, + StringRef StringTable, uint64_t MembersOffset, uint64_t PrevMemberOffset = 0) { // We don't write a symbol table on an archive with no members -- except on // Darwin, where the linker will abort unless the archive has a symbol table. - if (StringTable.empty() && !isDarwin(Kind)) + if (StringTable.empty() && !isDarwin(Kind) && !isCOFFArchive(Kind)) return; unsigned NumSyms = 0; @@ -413,17 +467,16 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, uint64_t OffsetSize = is64BitKind(Kind) ? 8 : 4; uint32_t Pad; - uint64_t Size = computeSymbolTableSize(Kind, NumSyms, OffsetSize, StringTable, &Pad); + uint64_t Size = computeSymbolTableSize(Kind, NumSyms, OffsetSize, + StringTable.size(), &Pad); writeSymbolTableHeader(Out, Kind, Deterministic, Size, PrevMemberOffset); - uint64_t Pos = isAIXBigArchive(Kind) ? sizeof(object::BigArchive::FixLenHdr) - : Out.tell() + Size; - if (isBSDLike(Kind)) printNBits(Out, Kind, NumSyms * 2 * OffsetSize); else printNBits(Out, Kind, NumSyms); + uint64_t Pos = MembersOffset; for (const MemberData &M : Members) { for (unsigned StringOffset : M.Symbols) { if (isBSDLike(Kind)) @@ -442,8 +495,35 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, Out.write(uint8_t(0)); } +static void writeSymbolMap(raw_ostream &Out, object::Archive::Kind Kind, + bool Deterministic, ArrayRef Members, + SymMap &SymMap, uint64_t MembersOffset) { + uint32_t Pad; + uint64_t Size = computeSymbolMapSize(Members.size(), SymMap, &Pad); + writeSymbolTableHeader(Out, Kind, Deterministic, Size, 0); + + uint32_t Pos = MembersOffset; + + printLE(Out, Members.size()); + for (const MemberData &M : Members) { + printLE(Out, Pos); // member offset + Pos += M.Header.size() + M.Data.size() + M.Padding.size(); + } + + printLE(Out, SymMap.Map.size()); + + for (auto S : SymMap.Map) + printLE(Out, S.second); + for (auto S : SymMap.Map) + Out << S.first << '\0'; + + while (Pad--) + Out.write(uint8_t(0)); +} + static Expected> -getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) { +getSymbols(MemoryBufferRef Buf, uint16_t Index, raw_ostream &SymNames, + SymMap *SymMap, bool &HasObject) { std::vector Ret; // In the scenario when LLVMContext is populated SymbolicFile will contain a @@ -472,10 +552,22 @@ getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) { for (const object::BasicSymbolRef &S : Obj->symbols()) { if (!isArchiveSymbol(S)) continue; - Ret.push_back(SymNames.tell()); - if (Error E = S.printName(SymNames)) - return std::move(E); - SymNames << '\0'; + if (SymMap) { + std::string Name; + raw_string_ostream NameStream(Name); + if (Error E = S.printName(NameStream)) + return std::move(E); + if (SymMap->Map.find(Name) != SymMap->Map.end()) + continue; // ignore duplicated symbol + SymMap->Map[Name] = Index; + Ret.push_back(SymNames.tell()); + SymNames << Name << '\0'; + } else { + Ret.push_back(SymNames.tell()); + if (Error E = S.printName(SymNames)) + return std::move(E); + SymNames << '\0'; + } } return Ret; } @@ -483,7 +575,8 @@ getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) { static Expected> computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, object::Archive::Kind Kind, bool Thin, bool Deterministic, - bool NeedSymbols, ArrayRef NewMembers) { + bool NeedSymbols, SymMap *SymMap, + ArrayRef NewMembers) { static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'}; uint64_t Pos = @@ -549,7 +642,7 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, // The big archive format needs to know the offset of the previous member // header. - unsigned PrevOffset = 0; + unsigned PrevOffset = 0, Index = 0; for (const NewArchiveMember &M : NewMembers) { std::string Header; raw_string_ostream Out(Header); @@ -557,6 +650,8 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, MemoryBufferRef Buf = M.Buf->getMemBufferRef(); StringRef Data = Thin ? "" : Buf.getBuffer(); + Index++; + // ld64 expects the members to be 8-byte aligned for 64-bit content and at // least 4-byte aligned for 32-bit content. Opt for the larger encoding // uniformly. This matches the behaviour with cctools and ensures that ld64 @@ -597,7 +692,7 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, std::vector Symbols; if (NeedSymbols) { Expected> SymbolsOrErr = - getSymbols(Buf, SymNames, HasObject); + getSymbols(Buf, Index, SymNames, SymMap, HasObject); if (!SymbolsOrErr) return createFileError(M.MemberName, SymbolsOrErr.takeError()); Symbols = std::move(*SymbolsOrErr); @@ -609,7 +704,7 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, // If there are no symbols, emit an empty symbol table, to satisfy Solaris // tools, older versions of which expect a symbol table in a non-empty // archive, regardless of whether there are any symbols in it. - if (HasObject && SymNames.tell() == 0) + if (HasObject && SymNames.tell() == 0 && !isCOFFArchive(Kind)) SymNames << '\0' << '\0' << '\0'; return Ret; } @@ -667,21 +762,32 @@ static Error writeArchiveToStream(raw_ostream &Out, raw_svector_ostream SymNames(SymNamesBuf); SmallString<0> StringTableBuf; raw_svector_ostream StringTable(StringTableBuf); + SymMap SymMap; + + // COFF symbol map uses 16-bit indexes, so we can't use it if there are too + // many members. + if (isCOFFArchive(Kind) && NewMembers.size() > 0xfffe) + Kind = object::Archive::K_GNU; - Expected> DataOrErr = - computeMemberData(StringTable, SymNames, Kind, Thin, Deterministic, - WriteSymtab, NewMembers); + Expected> DataOrErr = computeMemberData( + StringTable, SymNames, Kind, Thin, Deterministic, WriteSymtab, + isCOFFArchive(Kind) ? &SymMap : nullptr, NewMembers); if (Error E = DataOrErr.takeError()) return E; std::vector &Data = *DataOrErr; - if (!StringTableBuf.empty() && !isAIXBigArchive(Kind)) - Data.insert(Data.begin(), computeStringTable(StringTableBuf)); + uint64_t StringTableSize = 0; + MemberData StringTableMember; + if (!StringTableBuf.empty() && !isAIXBigArchive(Kind)) { + StringTableMember = computeStringTable(StringTableBuf); + StringTableSize = StringTableMember.Header.size() + + StringTableMember.Data.size() + + StringTableMember.Padding.size(); + } // We would like to detect if we need to switch to a 64-bit symbol table. - uint64_t LastMemberEndOffset = - isAIXBigArchive(Kind) ? sizeof(object::BigArchive::FixLenHdr) : 8; - uint64_t LastMemberHeaderOffset = LastMemberEndOffset; + uint64_t LastMemberEndOffset = 0; + uint64_t LastMemberHeaderOffset = 0; uint64_t NumSyms = 0; for (const auto &M : Data) { // Record the start of the member's offset @@ -691,19 +797,15 @@ static Error writeArchiveToStream(raw_ostream &Out, NumSyms += M.Symbols.size(); } + std::optional HeadersSize; + // The symbol table is put at the end of the big archive file. The symbol // table is at the start of the archive file for other archive formats. - if (WriteSymtab && !isAIXBigArchive(Kind)) { + if (WriteSymtab && !is64BitKind(Kind)) { // We assume 32-bit offsets to see if 32-bit symbols are possible or not. - uint64_t SymtabSize = computeSymbolTableSize(Kind, NumSyms, 4, SymNamesBuf); - auto computeSymbolTableHeaderSize = - [=] { - SmallString<0> TmpBuf; - raw_svector_ostream Tmp(TmpBuf); - writeSymbolTableHeader(Tmp, Kind, Deterministic, SymtabSize); - return TmpBuf.size(); - }; - LastMemberHeaderOffset += computeSymbolTableHeaderSize() + SymtabSize; + HeadersSize = computeHeadersSize(Kind, Data.size(), StringTableSize, + NumSyms, SymNamesBuf.size(), + isCOFFArchive(Kind) ? &SymMap : nullptr); // The SYM64 format is used when an archive's member offsets are larger than // 32-bits can hold. The need for this shift in format is detected by @@ -720,11 +822,12 @@ static Error writeArchiveToStream(raw_ostream &Out, // If LastMemberHeaderOffset isn't going to fit in a 32-bit varible we need // to switch to 64-bit. Note that the file can be larger than 4GB as long as // the last member starts before the 4GB offset. - if (LastMemberHeaderOffset >= Sym64Threshold) { + if (*HeadersSize + LastMemberHeaderOffset >= Sym64Threshold) { if (Kind == object::Archive::K_DARWIN) Kind = object::Archive::K_DARWIN64; else Kind = object::Archive::K_GNU64; + HeadersSize.reset(); } } @@ -736,11 +839,29 @@ static Error writeArchiveToStream(raw_ostream &Out, Out << "!\n"; if (!isAIXBigArchive(Kind)) { - if (WriteSymtab) - writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf); + if (WriteSymtab) { + if (!HeadersSize) + HeadersSize = computeHeadersSize( + Kind, Data.size(), StringTableSize, NumSyms, SymNamesBuf.size(), + isCOFFArchive(Kind) ? &SymMap : nullptr); + writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf, + *HeadersSize); + + if (isCOFFArchive(Kind)) + writeSymbolMap(Out, Kind, Deterministic, Data, SymMap, *HeadersSize); + } + + if (StringTableSize) + Out << StringTableMember.Header << StringTableMember.Data + << StringTableMember.Padding; + for (const MemberData &M : Data) Out << M.Header << M.Data << M.Padding; } else { + HeadersSize = sizeof(object::BigArchive::FixLenHdr); + LastMemberEndOffset += *HeadersSize; + LastMemberHeaderOffset += *HeadersSize; + // For the big archive (AIX) format, compute a table of member names and // offsets, used in the member table. uint64_t MemberTableNameStrTblSize = 0; @@ -813,7 +934,7 @@ static Error writeArchiveToStream(raw_ostream &Out, if (WriteSymtab && NumSyms > 0) writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf, - LastMemberEndOffset); + *HeadersSize, LastMemberEndOffset); } } Out.flush(); diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index 603b1f3d64737..35c249a7b3703 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -41,7 +41,7 @@ static constexpr StringLiteral AllStdExts = "mafdqlcbkjtpvnh"; static const RISCVSupportedExtension SupportedExtensions[] = { {"i", RISCVExtensionVersion{2, 0}}, - {"e", RISCVExtensionVersion{1, 9}}, + {"e", RISCVExtensionVersion{2, 0}}, {"m", RISCVExtensionVersion{2, 0}}, {"a", RISCVExtensionVersion{2, 0}}, {"f", RISCVExtensionVersion{2, 0}}, @@ -584,8 +584,9 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, bool HasRV64 = Arch.startswith("rv64"); // ISA string must begin with rv32 or rv64. if (!(Arch.startswith("rv32") || HasRV64) || (Arch.size() < 5)) { - return createStringError(errc::invalid_argument, - "string must begin with rv32{i,e,g} or rv64{i,g}"); + return createStringError( + errc::invalid_argument, + "string must begin with rv32{i,e,g} or rv64{i,e,g}"); } unsigned XLen = HasRV64 ? 64 : 32; @@ -601,14 +602,7 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, default: return createStringError(errc::invalid_argument, "first letter should be 'e', 'i' or 'g'"); - case 'e': { - // Extension 'e' is not allowed in rv64. - if (HasRV64) - return createStringError( - errc::invalid_argument, - "standard user-level extension 'e' requires 'rv32'"); - break; - } + case 'e': case 'i': break; case 'g': @@ -828,8 +822,6 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, } Error RISCVISAInfo::checkDependency() { - bool IsRv32 = XLen == 32; - bool HasE = Exts.count("e") != 0; bool HasD = Exts.count("d") != 0; bool HasF = Exts.count("f") != 0; bool HasZfinx = Exts.count("zfinx") != 0; @@ -839,11 +831,6 @@ Error RISCVISAInfo::checkDependency() { bool HasZve64d = Exts.count("zve64d") != 0; bool HasZvl = MinVLen != 0; - if (HasE && !IsRv32) - return createStringError( - errc::invalid_argument, - "standard user-level extension 'e' requires 'rv32'"); - if (HasF && HasZfinx) return createStringError(errc::invalid_argument, "'f' and 'zfinx' extensions are incompatible"); @@ -1115,6 +1102,8 @@ StringRef RISCVISAInfo::computeDefaultABI() const { } else if (XLen == 64) { if (hasExtension("d")) return "lp64d"; + if (hasExtension("e")) + return "lp64e"; return "lp64"; } llvm_unreachable("Invalid XLEN"); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 858b352c8c72e..ba33e9cfe949c 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -3844,6 +3844,25 @@ defm WHILEHS_CXX : sve2p1_int_while_rr_pn<"whilehs", 0b100>; defm WHILEHI_CXX : sve2p1_int_while_rr_pn<"whilehi", 0b101>; defm WHILELO_CXX : sve2p1_int_while_rr_pn<"whilelo", 0b110>; defm WHILELS_CXX : sve2p1_int_while_rr_pn<"whilels", 0b111>; + + +// Aliases for existing SVE instructions for which predicate-as-counter are +// accepted as an operand to the instruction +def : InstAlias<"ldr $Pt, [$Rn, $imm9, mul vl]", + (LDR_PXI PNRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), 0>; +def : InstAlias<"ldr $Pt, [$Rn]", + (LDR_PXI PNRAny:$Pt, GPR64sp:$Rn, 0), 0>; + +def : InstAlias<"str $Pt, [$Rn, $imm9, mul vl]", + (STR_PXI PNRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), 0>; +def : InstAlias<"str $Pt, [$Rn]", + (STR_PXI PNRAny:$Pt, GPR64sp:$Rn, 0), 0>; + +def : InstAlias<"mov $Pd, $Pn", + (ORR_PPzPP PNR8:$Pd, PNR8:$Pn, PNR8:$Pn, PNR8:$Pn), 0>; + +def : InstAlias<"pfalse\t$Pd", (PFALSE PNR8:$Pd), 0>; + } // End HasSVE2p1_or_HasSME2 //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index d5162d39ce43c..b0c554780edfd 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -7642,9 +7642,10 @@ unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, return Match_Success; return Match_InvalidOperand; - // If the kind is a token for a literal immediate, check if our asm - // operand matches. This is for InstAliases which have a fixed-value - // immediate in the syntax. + // If the kind is a token for a literal immediate, check if our asm operand + // matches. This is for InstAliases which have a fixed-value immediate in + // the asm string, such as hints which are parsed into a specific + // instruction definition. #define MATCH_HASH(N) \ case MCK__HASH_##N: \ return MatchesOpImmediate(N); @@ -7654,10 +7655,20 @@ unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, MATCH_HASH(3) MATCH_HASH(4) MATCH_HASH(6) + MATCH_HASH(7) MATCH_HASH(8) + MATCH_HASH(10) MATCH_HASH(12) + MATCH_HASH(14) MATCH_HASH(16) MATCH_HASH(24) + MATCH_HASH(25) + MATCH_HASH(26) + MATCH_HASH(27) + MATCH_HASH(28) + MATCH_HASH(29) + MATCH_HASH(30) + MATCH_HASH(31) MATCH_HASH(32) MATCH_HASH(40) MATCH_HASH(48) diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 1d3bf9150ca41..736d5b40ccb8d 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -728,8 +728,6 @@ class sve_int_pfalse opc, string asm> multiclass sve_int_pfalse opc, string asm> { def NAME : sve_int_pfalse; - def : InstAlias<"pfalse\t$Pd", (!cast(NAME) PNR8:$Pd), 0>; - def : Pat<(nxv16i1 immAllZerosV), (!cast(NAME))>; def : Pat<(nxv8i1 immAllZerosV), (!cast(NAME))>; def : Pat<(nxv4i1 immAllZerosV), (!cast(NAME))>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index c55b2e4d8fa14..6214c3e935ec4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -2770,7 +2770,7 @@ bool AMDGPUDAGToDAGISel::SelectDotIUVOP3PMods(SDValue In, SDValue &Src) const { assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value"); unsigned Mods = SISrcMods::OP_SEL_1; - unsigned SrcSign = C->getAPIntValue().getZExtValue(); + unsigned SrcSign = C->getZExtValue(); if (SrcSign == 1) Mods ^= SISrcMods::NEG; @@ -2784,7 +2784,7 @@ bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In, assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value"); unsigned Mods = SISrcMods::OP_SEL_1; - unsigned SrcVal = C->getAPIntValue().getZExtValue(); + unsigned SrcVal = C->getZExtValue(); if (SrcVal == 1) Mods |= SISrcMods::OP_SEL_0; diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index e6c6ab2efd50e..0fc2d8c6f5712 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -338,7 +338,7 @@ bool ARMBaseRegisterInfo::getRegAllocationHints( SmallVectorImpl &Hints, const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { const MachineRegisterInfo &MRI = MF.getRegInfo(); - std::pair Hint = MRI.getRegAllocationHint(VirtReg); + std::pair Hint = MRI.getRegAllocationHint(VirtReg); unsigned Odd; switch (Hint.first) { @@ -391,7 +391,7 @@ bool ARMBaseRegisterInfo::getRegAllocationHints( void ARMBaseRegisterInfo::updateRegAllocHint(Register Reg, Register NewReg, MachineFunction &MF) const { MachineRegisterInfo *MRI = &MF.getRegInfo(); - std::pair Hint = MRI->getRegAllocationHint(Reg); + std::pair Hint = MRI->getRegAllocationHint(Reg); if ((Hint.first == ARMRI::RegPairOdd || Hint.first == ARMRI::RegPairEven) && Hint.second.isVirtual()) { // If 'Reg' is one of the even / odd register pair and it's now changed diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index fbf688de637b5..efacc8b8f3c0e 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2720,10 +2720,7 @@ void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, } auto OpIsZero = [N](size_t OpNo) { - if (ConstantSDNode *OpConst = dyn_cast(N->getOperand(OpNo))) - if (OpConst->getZExtValue() == 0) - return true; - return false; + return isNullConstant(N->getOperand(OpNo)); }; // If the input accumulator value is not zero, select an instruction with @@ -3990,10 +3987,9 @@ void ARMDAGToDAGISel::Select(SDNode *N) { SDValue SmulLoHi = N->getOperand(1); SDValue Subc = N->getOperand(2); - auto *Zero = dyn_cast(Subc.getOperand(0)); + SDValue Zero = Subc.getOperand(0); - if (!Zero || Zero->getZExtValue() != 0 || - Subc.getOperand(1) != SmulLoHi.getValue(0) || + if (!isNullConstant(Zero) || Subc.getOperand(1) != SmulLoHi.getValue(0) || N->getOperand(1) != SmulLoHi.getValue(1) || N->getOperand(2) != Subc.getValue(1)) break; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 126bbc61a7d30..9c5f0df4d9468 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1007,6 +1007,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal); } } + + for (auto VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16, + MVT::v4i32}) { + setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); + setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); + setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); + setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); + } } if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) { @@ -10271,6 +10279,80 @@ static SDValue LowerVecReduceF(SDValue Op, SelectionDAG &DAG, return LowerVecReduce(Op, DAG, ST); } +static SDValue LowerVecReduceMinMax(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + if (!ST->hasNEON()) + return SDValue(); + + SDLoc dl(Op); + SDValue Op0 = Op->getOperand(0); + EVT VT = Op0.getValueType(); + EVT EltVT = VT.getVectorElementType(); + + unsigned PairwiseIntrinsic = 0; + switch (Op->getOpcode()) { + default: + llvm_unreachable("Expected VECREDUCE opcode"); + case ISD::VECREDUCE_UMIN: + PairwiseIntrinsic = Intrinsic::arm_neon_vpminu; + break; + case ISD::VECREDUCE_UMAX: + PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxu; + break; + case ISD::VECREDUCE_SMIN: + PairwiseIntrinsic = Intrinsic::arm_neon_vpmins; + break; + case ISD::VECREDUCE_SMAX: + PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxs; + break; + } + SDValue PairwiseOp = DAG.getConstant(PairwiseIntrinsic, dl, MVT::i32); + + unsigned NumElts = VT.getVectorNumElements(); + unsigned NumActiveLanes = NumElts; + + assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 || + NumActiveLanes == 2) && + "Only expected a power 2 vector size"); + + // Split 128-bit vectors, since vpmin/max takes 2 64-bit vectors. + if (VT.is128BitVector()) { + SDValue Lo, Hi; + std::tie(Lo, Hi) = DAG.SplitVector(Op0, dl); + VT = Lo.getValueType(); + Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Lo, Hi}); + NumActiveLanes /= 2; + } + + // Use pairwise reductions until one lane remains + while (NumActiveLanes > 1) { + Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Op0, Op0}); + NumActiveLanes /= 2; + } + + SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0, + DAG.getConstant(0, dl, MVT::i32)); + + // Result type may be wider than element type. + if (EltVT != Op.getValueType()) { + unsigned Extend = 0; + switch (Op->getOpcode()) { + default: + llvm_unreachable("Expected VECREDUCE opcode"); + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_UMAX: + Extend = ISD::ZERO_EXTEND; + break; + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_SMAX: + Extend = ISD::SIGN_EXTEND; + break; + } + Res = DAG.getNode(Extend, dl, Op.getValueType(), Res); + } + return Res; +} + static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { if (isStrongerThanMonotonic(cast(Op)->getSuccessOrdering())) // Acquire/Release load/store is not legal for targets without a dmb or @@ -10502,6 +10584,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::VECREDUCE_FMIN: case ISD::VECREDUCE_FMAX: return LowerVecReduceF(Op, DAG, Subtarget); + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_SMAX: + return LowerVecReduceMinMax(Op, DAG, Subtarget); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 913724daf0ad6..c57825949c1ce 100644 --- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -65,9 +65,8 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall( break; case RTLIB::MEMSET: AEABILibcall = AEABI_MEMSET; - if (ConstantSDNode *ConstantSrc = dyn_cast(Src)) - if (ConstantSrc->getZExtValue() == 0) - AEABILibcall = AEABI_MEMCLR; + if (isNullConstant(Src)) + AEABILibcall = AEABI_MEMCLR; break; default: return SDValue(); diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index e0bc431fd4bd8..6cbb7120e2667 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -12887,71 +12887,41 @@ bool ARMAsmParser::isMnemonicVPTPredicable(StringRef Mnemonic, if (!hasMVE()) return false; - return Mnemonic.startswith("vabav") || Mnemonic.startswith("vaddv") || - Mnemonic.startswith("vaddlv") || Mnemonic.startswith("vminnmv") || - Mnemonic.startswith("vminnmav") || Mnemonic.startswith("vminv") || - Mnemonic.startswith("vminav") || Mnemonic.startswith("vmaxnmv") || - Mnemonic.startswith("vmaxnmav") || Mnemonic.startswith("vmaxv") || - Mnemonic.startswith("vmaxav") || Mnemonic.startswith("vmladav") || - Mnemonic.startswith("vrmlaldavh") || Mnemonic.startswith("vrmlalvh") || - Mnemonic.startswith("vmlsdav") || Mnemonic.startswith("vmlav") || - Mnemonic.startswith("vmlaldav") || Mnemonic.startswith("vmlalv") || - Mnemonic.startswith("vmaxnm") || Mnemonic.startswith("vminnm") || - Mnemonic.startswith("vmax") || Mnemonic.startswith("vmin") || - Mnemonic.startswith("vshlc") || Mnemonic.startswith("vmovlt") || - Mnemonic.startswith("vmovlb") || Mnemonic.startswith("vshll") || - Mnemonic.startswith("vrshrn") || Mnemonic.startswith("vshrn") || - Mnemonic.startswith("vqrshrun") || Mnemonic.startswith("vqshrun") || - Mnemonic.startswith("vqrshrn") || Mnemonic.startswith("vqshrn") || - Mnemonic.startswith("vbic") || Mnemonic.startswith("vrev64") || - Mnemonic.startswith("vrev32") || Mnemonic.startswith("vrev16") || - Mnemonic.startswith("vmvn") || Mnemonic.startswith("veor") || - Mnemonic.startswith("vorn") || Mnemonic.startswith("vorr") || - Mnemonic.startswith("vand") || Mnemonic.startswith("vmul") || - Mnemonic.startswith("vqrdmulh") || Mnemonic.startswith("vqdmulh") || - Mnemonic.startswith("vsub") || Mnemonic.startswith("vadd") || - Mnemonic.startswith("vqsub") || Mnemonic.startswith("vqadd") || - Mnemonic.startswith("vabd") || Mnemonic.startswith("vrhadd") || - Mnemonic.startswith("vhsub") || Mnemonic.startswith("vhadd") || - Mnemonic.startswith("vdup") || Mnemonic.startswith("vcls") || - Mnemonic.startswith("vclz") || Mnemonic.startswith("vneg") || - Mnemonic.startswith("vabs") || Mnemonic.startswith("vqneg") || - Mnemonic.startswith("vqabs") || - (Mnemonic.startswith("vrint") && Mnemonic != "vrintr") || - Mnemonic.startswith("vcmla") || Mnemonic.startswith("vfma") || - Mnemonic.startswith("vfms") || Mnemonic.startswith("vcadd") || - Mnemonic.startswith("vadd") || Mnemonic.startswith("vsub") || - Mnemonic.startswith("vshl") || Mnemonic.startswith("vqshl") || - Mnemonic.startswith("vqrshl") || Mnemonic.startswith("vrshl") || - Mnemonic.startswith("vsri") || Mnemonic.startswith("vsli") || - Mnemonic.startswith("vrshr") || Mnemonic.startswith("vshr") || - Mnemonic.startswith("vpsel") || Mnemonic.startswith("vcmp") || - Mnemonic.startswith("vqdmladh") || Mnemonic.startswith("vqrdmladh") || - Mnemonic.startswith("vqdmlsdh") || Mnemonic.startswith("vqrdmlsdh") || - Mnemonic.startswith("vcmul") || Mnemonic.startswith("vrmulh") || - Mnemonic.startswith("vqmovn") || Mnemonic.startswith("vqmovun") || - Mnemonic.startswith("vmovnt") || Mnemonic.startswith("vmovnb") || - Mnemonic.startswith("vmaxa") || Mnemonic.startswith("vmaxnma") || - Mnemonic.startswith("vhcadd") || Mnemonic.startswith("vadc") || - Mnemonic.startswith("vsbc") || Mnemonic.startswith("vrshr") || - Mnemonic.startswith("vshr") || Mnemonic.startswith("vstrb") || - Mnemonic.startswith("vldrb") || - (Mnemonic.startswith("vstrh") && Mnemonic != "vstrhi") || - (Mnemonic.startswith("vldrh") && Mnemonic != "vldrhi") || - Mnemonic.startswith("vstrw") || Mnemonic.startswith("vldrw") || - Mnemonic.startswith("vldrd") || Mnemonic.startswith("vstrd") || - Mnemonic.startswith("vqdmull") || Mnemonic.startswith("vbrsr") || - Mnemonic.startswith("vfmas") || Mnemonic.startswith("vmlas") || - Mnemonic.startswith("vmla") || Mnemonic.startswith("vqdmlash") || - Mnemonic.startswith("vqdmlah") || Mnemonic.startswith("vqrdmlash") || - Mnemonic.startswith("vqrdmlah") || Mnemonic.startswith("viwdup") || - Mnemonic.startswith("vdwdup") || Mnemonic.startswith("vidup") || - Mnemonic.startswith("vddup") || Mnemonic.startswith("vctp") || - Mnemonic.startswith("vpnot") || Mnemonic.startswith("vbic") || - Mnemonic.startswith("vrmlsldavh") || Mnemonic.startswith("vmlsldav") || - Mnemonic.startswith("vcvt") || - MS.isVPTPredicableCDEInstr(Mnemonic) || - (Mnemonic.startswith("vmov") && - !(ExtraToken == ".f16" || ExtraToken == ".32" || - ExtraToken == ".16" || ExtraToken == ".8")); + if (MS.isVPTPredicableCDEInstr(Mnemonic) || + (Mnemonic.startswith("vldrh") && Mnemonic != "vldrhi") || + (Mnemonic.startswith("vmov") && + !(ExtraToken == ".f16" || ExtraToken == ".32" || ExtraToken == ".16" || + ExtraToken == ".8")) || + (Mnemonic.startswith("vrint") && Mnemonic != "vrintr") || + (Mnemonic.startswith("vstrh") && Mnemonic != "vstrhi")) + return true; + + const char *predicable_prefixes[] = { + "vabav", "vabd", "vabs", "vadc", "vadd", + "vaddlv", "vaddv", "vand", "vbic", "vbrsr", + "vcadd", "vcls", "vclz", "vcmla", "vcmp", + "vcmul", "vctp", "vcvt", "vddup", "vdup", + "vdwdup", "veor", "vfma", "vfmas", "vfms", + "vhadd", "vhcadd", "vhsub", "vidup", "viwdup", + "vldrb", "vldrd", "vldrw", "vmax", "vmaxa", + "vmaxav", "vmaxnm", "vmaxnma", "vmaxnmav", "vmaxnmv", + "vmaxv", "vmin", "vminav", "vminnm", "vminnmav", + "vminnmv", "vminv", "vmla", "vmladav", "vmlaldav", + "vmlalv", "vmlas", "vmlav", "vmlsdav", "vmlsldav", + "vmovlb", "vmovlt", "vmovnb", "vmovnt", "vmul", + "vmvn", "vneg", "vorn", "vorr", "vpnot", + "vpsel", "vqabs", "vqadd", "vqdmladh", "vqdmlah", + "vqdmlash", "vqdmlsdh", "vqdmulh", "vqdmull", "vqmovn", + "vqmovun", "vqneg", "vqrdmladh", "vqrdmlah", "vqrdmlash", + "vqrdmlsdh", "vqrdmulh", "vqrshl", "vqrshrn", "vqrshrun", + "vqshl", "vqshrn", "vqshrun", "vqsub", "vrev16", + "vrev32", "vrev64", "vrhadd", "vrmlaldavh", "vrmlalvh", + "vrmlsldavh", "vrmulh", "vrshl", "vrshr", "vrshrn", + "vsbc", "vshl", "vshlc", "vshll", "vshr", + "vshrn", "vsli", "vsri", "vstrb", "vstrd", + "vstrw", "vsub"}; + + return std::any_of( + std::begin(predicable_prefixes), std::end(predicable_prefixes), + [&Mnemonic](const char *prefix) { return Mnemonic.startswith(prefix); }); } diff --git a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp index 03015a457a0d1..6ea8e200bd4e9 100644 --- a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp +++ b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp @@ -275,8 +275,7 @@ bool AVRDAGToDAGISel::SelectInlineAsmMemoryOperand( } if (ImmNode->getValueType(0) != MVT::i8) { - Disp = CurDAG->getTargetConstant( - ImmNode->getAPIntValue().getZExtValue(), dl, MVT::i8); + Disp = CurDAG->getTargetConstant(ImmNode->getZExtValue(), dl, MVT::i8); } else { Disp = ImmOp; } diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp index 933ba3b40ce40..504019c2a09e8 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/MC/TargetRegistry.h" +#include "llvm/Transforms/Scalar.h" #include using namespace llvm; @@ -34,6 +35,11 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTarget() { initializeLoongArchDAGToDAGISelPass(*PR); } +static cl::opt + EnableLoopDataPrefetch("loongarch-enable-loop-data-prefetch", cl::Hidden, + cl::desc("Enable the loop data prefetch pass"), + cl::init(false)); + static std::string computeDataLayout(const Triple &TT) { if (TT.isArch64Bit()) return "e-m:e-p:64:64-i64:64-i128:128-n64-S128"; @@ -126,6 +132,12 @@ LoongArchTargetMachine::createPassConfig(PassManagerBase &PM) { } void LoongArchPassConfig::addIRPasses() { + // Run LoopDataPrefetch + // + // Run this before LSR to remove the multiplies involved in computing the + // pointer values N iterations ahead. + if (TM->getOptLevel() != CodeGenOpt::None && EnableLoopDataPrefetch) + addPass(createLoopDataPrefetchPass()); addPass(createAtomicExpandPass()); TargetPassConfig::addIRPasses(); diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h index 95184420f6087..521a7843b1142 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.h +++ b/llvm/lib/Target/NVPTX/NVPTX.h @@ -38,7 +38,7 @@ enum CondCodes { FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel); ModulePass *createNVPTXAssignValidGlobalNamesPass(); -ModulePass *createGenericToNVVMPass(); +ModulePass *createGenericToNVVMLegacyPass(); FunctionPass *createNVVMIntrRangePass(unsigned int SmVersion); FunctionPass *createNVVMReflectPass(unsigned int SmVersion); MachineFunctionPass *createNVPTXPrologEpilogPass(); @@ -67,6 +67,10 @@ struct NVVMReflectPass : PassInfoMixin { unsigned SmVersion; }; +struct GenericToNVVMPass : PassInfoMixin { + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + namespace NVPTX { enum DrvInterface { NVCL, diff --git a/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp index d892023c6cb7f..4f03e474edb47 100644 --- a/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp @@ -29,19 +29,13 @@ using namespace llvm; namespace llvm { -void initializeGenericToNVVMPass(PassRegistry &); +void initializeGenericToNVVMLegacyPassPass(PassRegistry &); } namespace { -class GenericToNVVM : public ModulePass { +class GenericToNVVM { public: - static char ID; - - GenericToNVVM() : ModulePass(ID) {} - - bool runOnModule(Module &M) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override {} + bool runOnModule(Module &M); private: Value *remapConstant(Module *M, Function *F, Constant *C, @@ -59,15 +53,6 @@ class GenericToNVVM : public ModulePass { }; } // end namespace -char GenericToNVVM::ID = 0; - -ModulePass *llvm::createGenericToNVVMPass() { return new GenericToNVVM(); } - -INITIALIZE_PASS( - GenericToNVVM, "generic-to-nvvm", - "Ensure that the global variables are in the global address space", false, - false) - bool GenericToNVVM::runOnModule(Module &M) { // Create a clone of each global variable that has the default address space. // The clone is created with the global address space specifier, and the pair @@ -293,3 +278,34 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C, llvm_unreachable("GenericToNVVM encountered an unsupported ConstantExpr"); } } + +namespace { +class GenericToNVVMLegacyPass : public ModulePass { +public: + static char ID; + + GenericToNVVMLegacyPass() : ModulePass(ID) {} + + bool runOnModule(Module &M) override; +}; +} // namespace + +char GenericToNVVMLegacyPass::ID = 0; + +ModulePass *llvm::createGenericToNVVMLegacyPass() { + return new GenericToNVVMLegacyPass(); +} + +INITIALIZE_PASS( + GenericToNVVMLegacyPass, "generic-to-nvvm", + "Ensure that the global variables are in the global address space", false, + false) + +bool GenericToNVVMLegacyPass::runOnModule(Module &M) { + return GenericToNVVM().runOnModule(M); +} + +PreservedAnalyses GenericToNVVMPass::run(Module &M, ModuleAnalysisManager &AM) { + return GenericToNVVM().runOnModule(M) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 5ff9b4df6d7f9..3d1e4fcde90aa 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -64,7 +64,7 @@ static cl::opt UseShortPointersOpt( namespace llvm { -void initializeGenericToNVVMPass(PassRegistry&); +void initializeGenericToNVVMLegacyPassPass(PassRegistry &); void initializeNVPTXAllocaHoistingPass(PassRegistry &); void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); void initializeNVPTXAtomicLowerPass(PassRegistry &); @@ -89,7 +89,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() { // but it's very NVPTX-specific. initializeNVVMReflectPass(PR); initializeNVVMIntrRangePass(PR); - initializeGenericToNVVMPass(PR); + initializeGenericToNVVMLegacyPassPass(PR); initializeNVPTXAllocaHoistingPass(PR); initializeNVPTXAssignValidGlobalNamesPass(PR); initializeNVPTXAtomicLowerPass(PR); @@ -246,6 +246,16 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { return false; }); + PB.registerPipelineParsingCallback( + [](StringRef PassName, ModulePassManager &PM, + ArrayRef) { + if (PassName == "generic-to-nvvm") { + PM.addPass(GenericToNVVMPass()); + return true; + } + return false; + }); + PB.registerPipelineStartEPCallback( [this](ModulePassManager &PM, OptimizationLevel Level) { FunctionPassManager FPM; @@ -348,7 +358,7 @@ void NVPTXPassConfig::addIRPasses() { if (getOptLevel() != CodeGenOpt::None) addPass(createNVPTXImageOptimizerPass()); addPass(createNVPTXAssignValidGlobalNamesPass()); - addPass(createGenericToNVVMPass()); + addPass(createGenericToNVVMLegacyPass()); // NVPTXLowerArgs is required for correctness and should be run right // before the address space inference passes. diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 227bd59ba3a64..a814bb1b4c07e 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -238,6 +238,8 @@ class XCOFFPPCAsmBackend : public PPCAsmBackend { createObjectTargetWriter() const override { return createPPCXCOFFObjectWriter(TT.isArch64Bit()); } + + std::optional getFixupKind(StringRef Name) const override; }; } // end anonymous namespace @@ -272,6 +274,13 @@ ELFPPCAsmBackend::getFixupKind(StringRef Name) const { return std::nullopt; } +std::optional +XCOFFPPCAsmBackend::getFixupKind(StringRef Name) const { + return StringSwitch>(Name) + .Case("R_REF", (MCFixupKind)PPC::fixup_ppc_nofixup) + .Default(std::nullopt); +} + MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index df0c666f5b113..9e8ee9f23107b 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -48,7 +48,8 @@ enum Fixups { /// Not a true fixup, but ties a symbol to a call to __tls_get_addr for the /// TLS general and local dynamic models, or inserts the thread-pointer - /// register number. + /// register number. It can also be used to tie the ref symbol to prevent it + /// from being garbage collected on AIX. fixup_ppc_nofixup, /// A 16-bit fixup corresponding to lo16(_foo) with implied 3 zero bits for diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp index 729cb35cbebcf..b6e749b781804 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp @@ -90,6 +90,12 @@ std::pair PPCXCOFFObjectWriter::getRelocTypeAndSignSize( return {XCOFF::RelocationType::R_RBR, EncodedSignednessIndicator | 25}; case PPC::fixup_ppc_br24abs: return {XCOFF::RelocationType::R_RBA, EncodedSignednessIndicator | 25}; + case PPC::fixup_ppc_nofixup: { + if (Modifier == MCSymbolRefExpr::VK_None) + return {XCOFF::RelocationType::R_REF, 0}; + else + llvm_unreachable("Unsupported Modifier"); + } break; case FK_Data_4: case FK_Data_8: const uint8_t SignAndSizeForFKData = diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 1ecaeabacf9f7..7c6fd3b85b1eb 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -2517,16 +2517,22 @@ void PPCAIXAsmPrinter::emitPGORefs() { OutStreamer->switchSection(CntsSection); if (OutContext.hasXCOFFSection( "__llvm_prf_data", - XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD))) - OutStreamer->emitXCOFFRefDirective("__llvm_prf_data[RW]"); + XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD))) { + MCSymbol *S = OutContext.getOrCreateSymbol("__llvm_prf_data[RW]"); + OutStreamer->emitXCOFFRefDirective(S); + } if (OutContext.hasXCOFFSection( "__llvm_prf_names", - XCOFF::CsectProperties(XCOFF::XMC_RO, XCOFF::XTY_SD))) - OutStreamer->emitXCOFFRefDirective("__llvm_prf_names[RO]"); + XCOFF::CsectProperties(XCOFF::XMC_RO, XCOFF::XTY_SD))) { + MCSymbol *S = OutContext.getOrCreateSymbol("__llvm_prf_names[RO]"); + OutStreamer->emitXCOFFRefDirective(S); + } if (OutContext.hasXCOFFSection( "__llvm_prf_vnds", - XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD))) - OutStreamer->emitXCOFFRefDirective("__llvm_prf_vnds[RW]"); + XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD))) { + MCSymbol *S = OutContext.getOrCreateSymbol("__llvm_prf_vnds[RW]"); + OutStreamer->emitXCOFFRefDirective(S); + } } } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 03a387570e3c6..7670d4d41cd86 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -18149,7 +18149,7 @@ PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent, if (Flags & PPC::MOF_RPlusSImm16) { SDValue Op0 = N.getOperand(0); SDValue Op1 = N.getOperand(1); - int16_t Imm = cast(Op1)->getAPIntValue().getZExtValue(); + int16_t Imm = cast(Op1)->getZExtValue(); if (!Align || isAligned(*Align, Imm)) { Disp = DAG.getTargetConstant(Imm, DL, N.getValueType()); Base = Op0; diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 9c6d54e62b16c..1627761052284 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -67,7 +67,7 @@ class RISCVAsmParser : public MCTargetAsmParser { SMLoc getLoc() const { return getParser().getTok().getLoc(); } bool isRV64() const { return getSTI().hasFeature(RISCV::Feature64Bit); } - bool isRV32E() const { return getSTI().hasFeature(RISCV::FeatureRV32E); } + bool isRVE() const { return getSTI().hasFeature(RISCV::FeatureRVE); } RISCVTargetStreamer &getTargetStreamer() { assert(getParser().getStreamer().getTargetStreamer() && @@ -368,6 +368,12 @@ struct RISCVOperand final : public MCParsedAsmOperand { bool isV0Reg() const { return Kind == KindTy::Register && Reg.RegNum == RISCV::V0; } + bool isAnyReg() const { + return Kind == KindTy::Register && + (RISCVMCRegisterClasses[RISCV::GPRRegClassID].contains(Reg.RegNum) || + RISCVMCRegisterClasses[RISCV::FPR64RegClassID].contains(Reg.RegNum) || + RISCVMCRegisterClasses[RISCV::VRRegClassID].contains(Reg.RegNum)); + } bool isImm() const override { return Kind == KindTy::Immediate; } bool isMem() const override { return false; } bool isSystemRegister() const { return Kind == KindTy::SystemRegister; } @@ -1346,9 +1352,9 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // Attempts to match Name as a register (either using the default name or // alternative ABI names), setting RegNo to the matching register. Upon -// failure, returns true and sets RegNo to 0. If IsRV32E then registers +// failure, returns true and sets RegNo to 0. If IsRVE then registers // x16-x31 will be rejected. -static bool matchRegisterNameHelper(bool IsRV32E, MCRegister &RegNo, +static bool matchRegisterNameHelper(bool IsRVE, MCRegister &RegNo, StringRef Name) { RegNo = MatchRegisterName(Name); // The 16-/32- and 64-bit FPRs have the same asm name. Check that the initial @@ -1360,7 +1366,7 @@ static bool matchRegisterNameHelper(bool IsRV32E, MCRegister &RegNo, static_assert(RISCV::F0_D < RISCV::F0_F, "FPR matching must be updated"); if (RegNo == RISCV::NoRegister) RegNo = MatchRegisterAltName(Name); - if (IsRV32E && RegNo >= RISCV::X16 && RegNo <= RISCV::X31) + if (IsRVE && RegNo >= RISCV::X16 && RegNo <= RISCV::X31) RegNo = RISCV::NoRegister; return RegNo == RISCV::NoRegister; } @@ -1381,7 +1387,7 @@ OperandMatchResultTy RISCVAsmParser::tryParseRegister(MCRegister &RegNo, RegNo = 0; StringRef Name = getLexer().getTok().getIdentifier(); - if (matchRegisterNameHelper(isRV32E(), (MCRegister &)RegNo, Name)) + if (matchRegisterNameHelper(isRVE(), (MCRegister &)RegNo, Name)) return MatchOperand_NoMatch; getParser().Lex(); // Eat identifier token. @@ -1414,7 +1420,7 @@ OperandMatchResultTy RISCVAsmParser::parseRegister(OperandVector &Operands, case AsmToken::Identifier: StringRef Name = getLexer().getTok().getIdentifier(); MCRegister RegNo; - matchRegisterNameHelper(isRV32E(), RegNo, Name); + matchRegisterNameHelper(isRVE(), RegNo, Name); if (RegNo == RISCV::NoRegister) { if (HadParens) @@ -1902,7 +1908,7 @@ OperandMatchResultTy RISCVAsmParser::parseMaskReg(OperandVector &Operands) { return MatchOperand_ParseFail; } MCRegister RegNo; - matchRegisterNameHelper(isRV32E(), RegNo, Name); + matchRegisterNameHelper(isRVE(), RegNo, Name); if (RegNo == RISCV::NoRegister) return MatchOperand_NoMatch; @@ -1921,7 +1927,7 @@ OperandMatchResultTy RISCVAsmParser::parseGPRAsFPR(OperandVector &Operands) { StringRef Name = getLexer().getTok().getIdentifier(); MCRegister RegNo; - matchRegisterNameHelper(isRV32E(), RegNo, Name); + matchRegisterNameHelper(isRVE(), RegNo, Name); if (RegNo == RISCV::NoRegister) return MatchOperand_NoMatch; diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index 15352c1c0885d..2d01d6df3a198 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -61,9 +61,9 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVDisassembler() { static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint32_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { - bool IsRV32E = Decoder->getSubtargetInfo().hasFeature(RISCV::FeatureRV32E); + bool IsRVE = Decoder->getSubtargetInfo().hasFeature(RISCV::FeatureRVE); - if (RegNo >= 32 || (IsRV32E && RegNo >= 16)) + if (RegNo >= 32 || (IsRVE && RegNo >= 16)) return MCDisassembler::Fail; MCRegister Reg = RISCV::X0 + RegNo; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp index 98c8e883e5960..8f891a04def53 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp @@ -40,7 +40,7 @@ ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits, StringRef ABIName) { auto TargetABI = getTargetABI(ABIName); bool IsRV64 = TT.isArch64Bit(); - bool IsRV32E = FeatureBits[RISCV::FeatureRV32E]; + bool IsRVE = FeatureBits[RISCV::FeatureRVE]; if (!ABIName.empty() && TargetABI == ABI_Unknown) { errs() @@ -54,11 +54,18 @@ ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits, errs() << "64-bit ABIs are not supported for 32-bit targets (ignoring " "target-abi)\n"; TargetABI = ABI_Unknown; - } else if (IsRV32E && TargetABI != ABI_ILP32E && TargetABI != ABI_Unknown) { + } else if (!IsRV64 && IsRVE && TargetABI != ABI_ILP32E && + TargetABI != ABI_Unknown) { // TODO: move this checking to RISCVTargetLowering and RISCVAsmParser errs() << "Only the ilp32e ABI is supported for RV32E (ignoring target-abi)\n"; TargetABI = ABI_Unknown; + } else if (IsRV64 && IsRVE && TargetABI != ABI_LP64E && + TargetABI != ABI_Unknown) { + // TODO: move this checking to RISCVTargetLowering and RISCVAsmParser + errs() + << "Only the lp64e ABI is supported for RV64E (ignoring target-abi)\n"; + TargetABI = ABI_Unknown; } if (TargetABI != ABI_Unknown) @@ -80,6 +87,7 @@ ABI getTargetABI(StringRef ABIName) { .Case("lp64", ABI_LP64) .Case("lp64f", ABI_LP64F) .Case("lp64d", ABI_LP64D) + .Case("lp64e", ABI_LP64E) .Default(ABI_Unknown); return TargetABI; } @@ -101,8 +109,6 @@ void validate(const Triple &TT, const FeatureBitset &FeatureBits) { report_fatal_error("RV64 target requires an RV64 CPU"); if (!TT.isArch64Bit() && !FeatureBits[RISCV::Feature32Bit]) report_fatal_error("RV32 target requires an RV32 CPU"); - if (TT.isArch64Bit() && FeatureBits[RISCV::FeatureRV32E]) - report_fatal_error("RV32E can't be enabled for an RV64 target"); if (FeatureBits[RISCV::Feature32Bit] && FeatureBits[RISCV::Feature64Bit]) report_fatal_error("RV32 and RV64 can't be combined"); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index 70fdc0e4ff120..175059fdf08e5 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -408,6 +408,7 @@ enum ABI { ABI_LP64, ABI_LP64F, ABI_LP64D, + ABI_LP64E, ABI_Unknown }; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp index a05254b0ae579..356bb895c6ed4 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp @@ -103,6 +103,7 @@ void RISCVTargetELFStreamer::finish() { EFlags |= ELF::EF_RISCV_FLOAT_ABI_DOUBLE; break; case RISCVABI::ABI_ILP32E: + case RISCVABI::ABI_LP64E: EFlags |= ELF::EF_RISCV_RVE; break; case RISCVABI::ABI_Unknown: diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp index 756cc14a87014..f7bcc197b1872 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp @@ -47,10 +47,10 @@ void RISCVTargetStreamer::setTargetABI(RISCVABI::ABI ABI) { } void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) { - if (STI.hasFeature(RISCV::FeatureRV32E)) - emitAttribute(RISCVAttrs::STACK_ALIGN, RISCVAttrs::ALIGN_4); - else - emitAttribute(RISCVAttrs::STACK_ALIGN, RISCVAttrs::ALIGN_16); + if (STI.hasFeature(RISCV::FeatureRVE)) + report_fatal_error("Codegen not yet implemented for RVE"); + + emitAttribute(RISCVAttrs::STACK_ALIGN, RISCVAttrs::ALIGN_16); auto ParseResult = RISCVFeatures::parseFeatureBits( STI.hasFeature(RISCV::Feature64Bit), STI.getFeatureBits()); diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index b0bb2992f6b42..0cf73bc37be84 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -589,11 +589,11 @@ def IsRV32 : Predicate<"!Subtarget->is64Bit()">, defvar RV32 = DefaultMode; def RV64 : HwMode<"+64bit", [IsRV64]>; -def FeatureRV32E - : SubtargetFeature<"e", "IsRV32E", "true", - "Implements RV32E (provides 16 rather than 32 GPRs)">; -def IsRV32E : Predicate<"Subtarget->isRV32E()">, - AssemblerPredicate<(all_of FeatureRV32E)>; +def FeatureRVE + : SubtargetFeature<"e", "IsRVE", "true", + "Implements RV{32,64}E (provides 16 rather than 32 GPRs)">; +def IsRVE : Predicate<"Subtarget->isRVE()">, + AssemblerPredicate<(all_of FeatureRVE)>; def FeatureRelax : SubtargetFeature<"relax", "EnableLinkerRelax", "true", diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 595e094662f9a..9310c8161cd46 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -76,8 +76,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { - if (Subtarget.isRV32E()) - report_fatal_error("Codegen not yet implemented for RV32E"); + if (Subtarget.isRVE()) + report_fatal_error("Codegen not yet implemented for RVE"); RISCVABI::ABI ABI = Subtarget.getTargetABI(); assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index ab8a8a4cc9935..85c3082dce64f 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1090,6 +1090,17 @@ def : InstAlias<"zext.b $rd, $rs", (ANDI GPR:$rd, GPR:$rs, 0xFF), 0>; // .insn directive instructions //===----------------------------------------------------------------------===// +def AnyRegOperand : AsmOperandClass { + let Name = "AnyRegOperand"; + let RenderMethod = "addRegOperands"; + let PredicateMethod = "isAnyReg"; +} + +def AnyReg : Operand { + let OperandType = "OPERAND_REGISTER"; + let ParserMatchClass = AnyRegOperand; +} + // isCodeGenOnly = 1 to hide them from the tablegened assembly parser. let isCodeGenOnly = 1, hasSideEffects = 1, mayLoad = 1, mayStore = 1, hasNoSchedulingInfo = 1 in { diff --git a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp index 281918259cdb3..6b658539a319b 100644 --- a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp +++ b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp @@ -193,6 +193,19 @@ static bool lowerRISCVVMachineInstrToMCInst(const MachineInstr *MI, } else if (RISCV::FPR64RegClass.contains(Reg)) { Reg = TRI->getSubReg(Reg, RISCV::sub_32); assert(Reg && "Superregister does not exist"); + } else if (RISCV::VRN2M1RegClass.contains(Reg) || + RISCV::VRN2M2RegClass.contains(Reg) || + RISCV::VRN2M4RegClass.contains(Reg) || + RISCV::VRN3M1RegClass.contains(Reg) || + RISCV::VRN3M2RegClass.contains(Reg) || + RISCV::VRN4M1RegClass.contains(Reg) || + RISCV::VRN4M2RegClass.contains(Reg) || + RISCV::VRN5M1RegClass.contains(Reg) || + RISCV::VRN6M1RegClass.contains(Reg) || + RISCV::VRN7M1RegClass.contains(Reg) || + RISCV::VRN8M1RegClass.contains(Reg)) { + Reg = TRI->getSubReg(Reg, RISCV::sub_vrm1_0); + assert(Reg && "Subregister does not exist"); } MCOp = MCOperand::createReg(Reg); diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index 301f2ad77d00c..d06453c82739e 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -45,6 +45,7 @@ class RISCVReg64 let SubRegIndices = [sub_32]; } +let FallbackRegAltNameIndex = NoRegAltName in def ABIRegAltName : RegAltNameIndex; def sub_vrm4_0 : SubRegIndex<256>; @@ -415,51 +416,46 @@ class VRegList LIn, int start, int nf, int lmul, bit isV0> { } // Vector registers -let RegAltNameIndices = [ABIRegAltName] in { - foreach Index = 0-31 in { - def V#Index : RISCVReg, DwarfRegNum<[!add(Index, 96)]>; - } +foreach Index = 0-31 in { + def V#Index : RISCVReg, DwarfRegNum<[!add(Index, 96)]>; +} - foreach Index = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, - 24, 26, 28, 30] in { - def V#Index#M2 : RISCVRegWithSubRegs("V"#Index), - !cast("V"#!add(Index, 1))], - ["v"#Index]>, - DwarfRegAlias("V"#Index)> { - let SubRegIndices = [sub_vrm1_0, sub_vrm1_1]; - } +foreach Index = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, + 24, 26, 28, 30] in { + def V#Index#M2 : RISCVRegWithSubRegs("V"#Index), + !cast("V"#!add(Index, 1))]>, + DwarfRegAlias("V"#Index)> { + let SubRegIndices = [sub_vrm1_0, sub_vrm1_1]; } +} - foreach Index = [0, 4, 8, 12, 16, 20, 24, 28] in { - def V#Index#M4 : RISCVRegWithSubRegs("V"#Index#"M2"), - !cast("V"#!add(Index, 2)#"M2")], - ["v"#Index]>, - DwarfRegAlias("V"#Index)> { - let SubRegIndices = [sub_vrm2_0, sub_vrm2_1]; - } +foreach Index = [0, 4, 8, 12, 16, 20, 24, 28] in { + def V#Index#M4 : RISCVRegWithSubRegs("V"#Index#"M2"), + !cast("V"#!add(Index, 2)#"M2")]>, + DwarfRegAlias("V"#Index)> { + let SubRegIndices = [sub_vrm2_0, sub_vrm2_1]; } +} - foreach Index = [0, 8, 16, 24] in { - def V#Index#M8 : RISCVRegWithSubRegs("V"#Index#"M4"), - !cast("V"#!add(Index, 4)#"M4")], - ["v"#Index]>, - DwarfRegAlias("V"#Index)> { - let SubRegIndices = [sub_vrm4_0, sub_vrm4_1]; - } +foreach Index = [0, 8, 16, 24] in { + def V#Index#M8 : RISCVRegWithSubRegs("V"#Index#"M4"), + !cast("V"#!add(Index, 4)#"M4")]>, + DwarfRegAlias("V"#Index)> { + let SubRegIndices = [sub_vrm4_0, sub_vrm4_1]; } - - def VTYPE : RISCVReg<0, "vtype", ["vtype"]>; - def VL : RISCVReg<0, "vl", ["vl"]>; - def VXSAT : RISCVReg<0, "vxsat", ["vxsat"]>; - def VXRM : RISCVReg<0, "vxrm", ["vxrm"]>; - let isConstant = true in - def VLENB : RISCVReg<0, "vlenb", ["vlenb"]>, - DwarfRegNum<[!add(4096, SysRegVLENB.Encoding)]>; } +def VTYPE : RISCVReg<0, "vtype">; +def VL : RISCVReg<0, "vl">; +def VXSAT : RISCVReg<0, "vxsat">; +def VXRM : RISCVReg<0, "vxrm">; +let isConstant = true in +def VLENB : RISCVReg<0, "vlenb">, + DwarfRegNum<[!add(4096, SysRegVLENB.Encoding)]>; + def VCSR : RegisterClass<"RISCV", [XLenVT], 32, (add VTYPE, VL, VLENB)> { let RegInfos = XLenRI; @@ -582,15 +578,3 @@ foreach m = LMULList.m in { // Special registers def FFLAGS : RISCVReg<0, "fflags">; def FRM : RISCVReg<0, "frm">; - -// Any type register. Used for .insn directives when we don't know what the -// register types could be. -// NOTE: The alignment and size are bogus values. The Size needs to be non-zero -// or tablegen will use "untyped" to determine the size which will assert. -let isAllocatable = 0 in -def AnyReg : RegisterClass<"RISCV", [untyped], 32, - (add (sequence "X%u", 0, 31), - (sequence "F%u_D", 0, 31), - (sequence "V%u", 0, 31))> { - let Size = 32; -} diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index a6a216392de74..2f6b747140e59 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -25,7 +25,7 @@ static cl::opt RVVRegisterWidthLMUL( cl::desc( "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used " "by autovectorized code. Fractional LMULs are not supported."), - cl::init(1), cl::Hidden); + cl::init(2), cl::Hidden); static cl::opt SLPMaxVF( "riscv-v-slp-max-vf", diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp index 55be64ad7da01..45502a577e4e2 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp @@ -18,21 +18,18 @@ using namespace llvm; -WebAssemblyDebugValueManager::WebAssemblyDebugValueManager( - MachineInstr *Instr) { +WebAssemblyDebugValueManager::WebAssemblyDebugValueManager(MachineInstr *Def) { // This code differs from MachineInstr::collectDebugValues in that it scans // the whole BB, not just contiguous DBG_VALUEs. - if (!Instr->getOperand(0).isReg()) + if (!Def->getOperand(0).isReg()) return; - CurrentReg = Instr->getOperand(0).getReg(); + CurrentReg = Def->getOperand(0).getReg(); - MachineBasicBlock::iterator DI = *Instr; - ++DI; - for (MachineBasicBlock::iterator DE = Instr->getParent()->end(); DI != DE; - ++DI) { - if (DI->isDebugValue() && - DI->hasDebugOperandForReg(Instr->getOperand(0).getReg())) - DbgValues.push_back(&*DI); + for (MachineBasicBlock::iterator MI = std::next(Def->getIterator()), + ME = Def->getParent()->end(); + MI != ME; ++MI) { + if (MI->isDebugValue() && MI->hasDebugOperandForReg(CurrentReg)) + DbgValues.push_back(&*MI); } } @@ -42,15 +39,8 @@ void WebAssemblyDebugValueManager::move(MachineInstr *Insert) { MBB->splice(Insert, DBI->getParent(), DBI); } -void WebAssemblyDebugValueManager::updateReg(unsigned Reg) { - for (auto *DBI : DbgValues) - for (auto &MO : DBI->getDebugOperandsForReg(CurrentReg)) - MO.setReg(Reg); - CurrentReg = Reg; -} - void WebAssemblyDebugValueManager::clone(MachineInstr *Insert, - unsigned NewReg) { + Register NewReg) { MachineBasicBlock *MBB = Insert->getParent(); MachineFunction *MF = MBB->getParent(); for (MachineInstr *DBI : reverse(DbgValues)) { @@ -61,6 +51,13 @@ void WebAssemblyDebugValueManager::clone(MachineInstr *Insert, } } +void WebAssemblyDebugValueManager::updateReg(Register Reg) { + for (auto *DBI : DbgValues) + for (auto &MO : DBI->getDebugOperandsForReg(CurrentReg)) + MO.setReg(Reg); + CurrentReg = Reg; +} + void WebAssemblyDebugValueManager::replaceWithLocal(unsigned LocalId) { for (auto *DBI : DbgValues) { auto IndexType = DBI->isIndirectDebugValue() diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h b/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h index c2dd569093044..4c63af21406e1 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h @@ -9,6 +9,9 @@ /// \file /// This file contains the declaration of the WebAssembly-specific /// manager for DebugValues associated with the specific MachineInstr. +/// This pass currently does not handle DBG_VALUE_LISTs; they are assumed to +/// have been set to undef in NullifyDebugValueLists pass. +/// TODO Handle DBG_VALUE_LIST /// //===----------------------------------------------------------------------===// @@ -16,21 +19,25 @@ #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYDEBUGVALUEMANAGER_H #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/Register.h" namespace llvm { class MachineInstr; class WebAssemblyDebugValueManager { - SmallVector DbgValues; - unsigned CurrentReg; + SmallVector DbgValues; + Register CurrentReg; public: - WebAssemblyDebugValueManager(MachineInstr *Instr); + WebAssemblyDebugValueManager(MachineInstr *Def); void move(MachineInstr *Insert); - void updateReg(unsigned Reg); - void clone(MachineInstr *Insert, unsigned NewReg); + void clone(MachineInstr *Insert, Register NewReg); + // Update the register for Def and DBG_VALUEs. + void updateReg(Register Reg); + // Replace the current register in DBG_VALUEs with the given LocalId target + // index. void replaceWithLocal(unsigned LocalId); }; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td index 6a123f8f4030f..ca9a5ef9dda1c 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td @@ -73,7 +73,7 @@ defm RET_CALL : "return_call \t$callee", "return_call\t$callee", 0x12>, Requires<[HasTailCall]>; -let isReturn = 1 in +let isReturn = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in defm RET_CALL_INDIRECT : I<(outs), (ins TypeIndex:$type, table32_op:$table, variable_ops), (outs), (ins TypeIndex:$type, table32_op:$table), [], diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 5e90a94819b6b..dfac24935e244 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1039,6 +1039,8 @@ void X86DAGToDAGISel::PreprocessISelDAG() { break; assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!"); + assert(N->getValueType(0).getVectorElementType() != MVT::i16 && + "We can't replace VSELECT with BLENDV in vXi16!"); SDValue Blendv = CurDAG->getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0), N->getOperand(0), N->getOperand(1), N->getOperand(2)); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a491ba84bf705..b9ccb5b2c48dc 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -24167,6 +24167,10 @@ static SDValue LowerVectorAllZero(const SDLoc &DL, SDValue V, ISD::CondCode CC, return SDValue(); } + // Quit if not convertable to legal scalar or 128/256-bit vector. + if (!llvm::has_single_bit(VT.getSizeInBits())) + return SDValue(); + assert((CC == ISD::SETEQ || CC == ISD::SETNE) && "Unsupported ISD::CondCode"); X86CC = (CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE); @@ -24188,36 +24192,42 @@ static SDValue LowerVectorAllZero(const SDLoc &DL, SDValue V, ISD::CondCode CC, DAG.getConstant(0, DL, IntVT)); } - // Quit if not splittable to 128/256-bit vector. - if (!llvm::has_single_bit(VT.getSizeInBits())) + // Without PTEST, a masked v2i64 or-reduction is not faster than + // scalarization. + bool UseKORTEST = Subtarget.useAVX512Regs(); + bool UsePTEST = Subtarget.hasSSE41(); + if (!UsePTEST && !Mask.isAllOnes() && VT.getScalarSizeInBits() > 32) return SDValue(); - // Split down to 128/256-bit vector. - unsigned TestSize = Subtarget.hasAVX() ? 256 : 128; + // Split down to 128/256/512-bit vector. + unsigned TestSize = UseKORTEST ? 512 : (Subtarget.hasAVX() ? 256 : 128); while (VT.getSizeInBits() > TestSize) { auto Split = DAG.SplitVector(V, DL); VT = Split.first.getValueType(); V = DAG.getNode(ISD::OR, DL, VT, Split.first, Split.second); } - bool UsePTEST = Subtarget.hasSSE41(); + if (UseKORTEST && VT.is512BitVector()) { + V = DAG.getBitcast(MVT::v16i32, MaskBits(V)); + V = DAG.getSetCC(DL, MVT::v16i1, V, + getZeroVector(MVT::v16i32, Subtarget, DAG, DL), + ISD::SETNE); + return DAG.getNode(X86ISD::KORTEST, DL, MVT::i32, V, V); + } + if (UsePTEST) { MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; V = DAG.getBitcast(TestVT, MaskBits(V)); return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, V, V); } - // Without PTEST, a masked v2i64 or-reduction is not faster than - // scalarization. - if (!Mask.isAllOnes() && VT.getScalarSizeInBits() > 32) - return SDValue(); - V = DAG.getBitcast(MVT::v16i8, MaskBits(V)); V = DAG.getNode(X86ISD::PCMPEQ, DL, MVT::v16i8, V, getZeroVector(MVT::v16i8, Subtarget, DAG, DL)); + V = DAG.getNOT(DL, V, MVT::v16i8); V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V); return DAG.getNode(X86ISD::CMP, DL, MVT::i32, V, - DAG.getConstant(0xFFFF, DL, MVT::i32)); + DAG.getConstant(0, DL, MVT::i32)); } // Check whether an OR'd reduction tree is PTEST-able, or if we can fallback to @@ -44646,6 +44656,23 @@ static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG, // Special case for (pre-legalization) vXi1 reductions. if (NumElts > 64 || !isPowerOf2_32(NumElts)) return SDValue(); + if (Match.getOpcode() == ISD::SETCC) { + ISD::CondCode CC = cast(Match.getOperand(2))->get(); + if ((BinOp == ISD::AND && CC == ISD::CondCode::SETEQ) || + (BinOp == ISD::OR && CC == ISD::CondCode::SETNE)) { + // If representable as a scalar integer: + // For all_of(setcc(x,y,eq)) - use (iX)x == (iX)y. + // For any_of(setcc(x,y,ne)) - use (iX)x != (iX)y. + EVT VecVT = Match.getOperand(0).getValueType(); + EVT IntVT = EVT::getIntegerVT(Ctx, VecVT.getSizeInBits()); + if (TLI.isTypeLegal(IntVT)) { + SDValue LHS = DAG.getFreeze(Match.getOperand(0)); + SDValue RHS = DAG.getFreeze(Match.getOperand(1)); + return DAG.getSetCC(DL, ExtractVT, DAG.getBitcast(IntVT, LHS), + DAG.getBitcast(IntVT, RHS), CC); + } + } + } if (TLI.isTypeLegal(MatchVT)) { // If this is a legal AVX512 predicate type then we can just bitcast. EVT MovmskVT = EVT::getIntegerVT(Ctx, NumElts); @@ -44657,20 +44684,7 @@ static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG, ISD::CondCode CC = cast(Match.getOperand(2))->get(); if ((BinOp == ISD::AND && CC == ISD::CondCode::SETEQ) || (BinOp == ISD::OR && CC == ISD::CondCode::SETNE)) { - EVT VecVT = Match.getOperand(0).getValueType(); - - // If representable as a scalar integer: - // For all_of(setcc(x,y,eq)) - use (iX)x == (iX)y. - // For any_of(setcc(x,y,ne)) - use (iX)x != (iX)y. - EVT IntVT = EVT::getIntegerVT(Ctx, VecVT.getSizeInBits()); - if (TLI.isTypeLegal(IntVT)) { - SDValue LHS = DAG.getFreeze(Match.getOperand(0)); - SDValue RHS = DAG.getFreeze(Match.getOperand(1)); - return DAG.getSetCC(DL, ExtractVT, DAG.getBitcast(IntVT, LHS), - DAG.getBitcast(IntVT, RHS), CC); - } - - EVT VecSVT = VecVT.getScalarType(); + EVT VecSVT = Match.getOperand(0).getValueType().getScalarType(); if (VecSVT != MVT::i8 && (VecSVT.getSizeInBits() % 8) == 0) { NumElts *= VecSVT.getSizeInBits() / 8; EVT CmpVT = EVT::getVectorVT(Ctx, MVT::i8, NumElts); @@ -53956,7 +53970,7 @@ static bool isOrXorXorTree(SDValue X, bool Root = true) { /// Recursive helper for combineVectorSizedSetCCEquality() to emit the memcmp /// expansion. template -static SDValue emitOrXorXorTree(SDValue X, SDLoc &DL, SelectionDAG &DAG, +static SDValue emitOrXorXorTree(SDValue X, const SDLoc &DL, SelectionDAG &DAG, EVT VecVT, EVT CmpVT, bool HasPT, F SToV) { SDValue Op0 = X.getOperand(0); SDValue Op1 = X.getOperand(1); @@ -53983,14 +53997,14 @@ static SDValue emitOrXorXorTree(SDValue X, SDLoc &DL, SelectionDAG &DAG, /// Try to map a 128-bit or larger integer comparison to vector instructions /// before type legalization splits it up into chunks. -static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG, +static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, + ISD::CondCode CC, + const SDLoc &DL, + SelectionDAG &DAG, const X86Subtarget &Subtarget) { - ISD::CondCode CC = cast(SetCC->getOperand(2))->get(); assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate"); // We're looking for an oversized integer equality comparison. - SDValue X = SetCC->getOperand(0); - SDValue Y = SetCC->getOperand(1); EVT OpVT = X.getValueType(); unsigned OpSize = OpVT.getSizeInBits(); if (!OpVT.isScalarInteger() || OpSize < 128) @@ -54015,9 +54029,6 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG, !IsOrXorXorTreeCCZero) return SDValue(); - EVT VT = SetCC->getValueType(0); - SDLoc DL(SetCC); - // Use XOR (plus OR) and PTEST after SSE4.1 for 128/256-bit operands. // Use PCMPNEQ (plus OR) and KORTEST for 512-bit operands. // Otherwise use PCMPEQ (plus AND) and mask testing. @@ -54159,7 +54170,8 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, SDLoc DL(N); if (CC == ISD::SETNE || CC == ISD::SETEQ) { - if (SDValue V = combineVectorSizedSetCCEquality(N, DAG, Subtarget)) + if (SDValue V = combineVectorSizedSetCCEquality(VT, LHS, RHS, CC, DL, DAG, + Subtarget)) return V; if (VT == MVT::i1 && isNullConstant(RHS)) { @@ -54428,25 +54440,25 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG, // Fold movmsk(icmp_eq(and(x,c1),0)) -> movmsk(not(shl(x,c2))) // iff pow2splat(c1). + // Use KnownBits to determine if only a single bit is non-zero + // in each element (pow2 or zero), and shift that bit to the msb. if (Src.getOpcode() == X86ISD::PCMPEQ && - Src.getOperand(0).getOpcode() == ISD::AND && ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode())) { - SDValue LHS = Src.getOperand(0).getOperand(0); - SDValue RHS = Src.getOperand(0).getOperand(1); - KnownBits KnownRHS = DAG.computeKnownBits(RHS); - if (KnownRHS.isConstant() && KnownRHS.getConstant().isPowerOf2()) { + KnownBits KnownSrc = DAG.computeKnownBits(Src.getOperand(0)); + if (KnownSrc.countMaxPopulation() == 1) { SDLoc DL(N); MVT ShiftVT = SrcVT; + SDValue ShiftSrc = Src.getOperand(0); if (ShiftVT.getScalarType() == MVT::i8) { // vXi8 shifts - we only care about the signbit so can use PSLLW. ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2); - LHS = DAG.getBitcast(ShiftVT, LHS); + ShiftSrc = DAG.getBitcast(ShiftVT, ShiftSrc); } - unsigned ShiftAmt = KnownRHS.getConstant().countl_zero(); - LHS = getTargetVShiftByConstNode(X86ISD::VSHLI, DL, ShiftVT, LHS, - ShiftAmt, DAG); - LHS = DAG.getNOT(DL, DAG.getBitcast(SrcVT, LHS), SrcVT); - return DAG.getNode(X86ISD::MOVMSK, DL, VT, LHS); + unsigned ShiftAmt = KnownSrc.countMinLeadingZeros(); + ShiftSrc = getTargetVShiftByConstNode(X86ISD::VSHLI, DL, ShiftVT, + ShiftSrc, ShiftAmt, DAG); + ShiftSrc = DAG.getNOT(DL, DAG.getBitcast(SrcVT, ShiftSrc), SrcVT); + return DAG.getNode(X86ISD::MOVMSK, DL, VT, ShiftSrc); } } diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td index 42cc7c8f4585d..46d1412aa984c 100644 --- a/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -125,12 +125,12 @@ class BinOpRR_Rev opcode, string mnemonic, X86TypeInfo typeinfo, let hasSideEffects = 0; } -// BinOpRR_RFF_Rev - Binary instructions with inputs "reg, reg"(reversed +// BinOpRR_RFF_Rev - Binary instructions with inputs "reg, reg"(reversed // encoding), with sched = WriteADC. class BinOpRR_RFF_Rev opcode, string mnemonic, X86TypeInfo typeinfo> : BinOpRR_Rev; -// BinOpRR_F_Rev - Binary instructions with inputs "reg, reg"(reversed +// BinOpRR_F_Rev - Binary instructions with inputs "reg, reg"(reversed // encoding), without outlist dag. class BinOpRR_F_Rev opcode, string mnemonic, X86TypeInfo typeinfo> : ITy opcode, string mnemonic, X86TypeInfo typeinfo, // has both a regclass and EFLAGS as a result, and has EFLAGS as input. class BinOpRM_RFF opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode> - : BinOpRM_ImplicitUse; // BinOpRI - Binary instructions with inputs "reg, imm". @@ -273,21 +273,21 @@ class BinOpMR_RMW opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode> : BinOpMR, + (implicit EFLAGS)]>, Sched<[WriteALURMW, // base, scale, index, offset, segment ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, WriteALU.ReadAfterFold]>; // reg -// BinOpMR_RMW_FF - Binary instructions with inputs "[mem], reg", where the +// BinOpMR_RMW_FF - Binary instructions with inputs "[mem], reg", where the // pattern use EFLAGS as operand and implicitly use EFLAGS. class BinOpMR_RMW_FF opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode> : BinOpMR, + (implicit EFLAGS)]>, Sched<[WriteADCRMW, // base, scale, index, offset, segment ReadDefault, ReadDefault, ReadDefault, @@ -321,7 +321,7 @@ class BinOpMI_RMW opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpMI, + (implicit EFLAGS)]>, Sched<[WriteALURMW]>; // BinOpMI_RMW_FF - Binary instructions with inputs "[mem], imm", where the @@ -331,7 +331,7 @@ class BinOpMI_RMW_FF opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpMI, + (implicit EFLAGS)]>, Sched<[WriteADCRMW]>; // BinOpMI_F - Binary instructions with inputs "[mem], imm", where the pattern @@ -359,7 +359,7 @@ class BinOpMI8_RMW, + (implicit EFLAGS)]>, Sched<[WriteALURMW]>; // BinOpMI8_RMW_FF - Binary instructions with inputs "[mem], imm8", where the @@ -369,7 +369,7 @@ class BinOpMI8_RMW_FF, + (implicit EFLAGS)]>, Sched<[WriteADCRMW]>; // BinOpMI8_F - Binary instructions with inputs "[mem], imm8", where the pattern @@ -387,7 +387,7 @@ class BinOpAI opcode, string mnemonic, X86TypeInfo typeinfo, Register areg, string operands, X86FoldableSchedWrite sched = WriteALU> : ITy, + mnemonic, operands, []>, Sched<[sched]> { let ImmT = typeinfo.ImmEncoding; let Uses = [areg]; @@ -427,7 +427,7 @@ class UnaryOpR opcode, Format f, string mnemonic, X86TypeInfo info, class INCDECR : UnaryOpR<0xFE, f, mnemonic, info, - [(set info.RegClass:$dst, EFLAGS, + [(set info.RegClass:$dst, EFLAGS, (node info.RegClass:$src1, 1))]>; // INCDECM - Instructions like "inc [mem]". @@ -444,16 +444,16 @@ class INCDECR_ALT opcode, string mnemonic, X86TypeInfo info> } // MulOpR - Instructions like "mul reg". -class MulOpR opcode, Format f, string mnemonic, X86TypeInfo info, +class MulOpR opcode, Format f, string mnemonic, X86TypeInfo info, X86FoldableSchedWrite sched, list pattern> - : ITy, + : ITy, Sched<[sched]>; // MulOpM - Instructions like "mul [mem]". -class MulOpM opcode, Format f, string mnemonic, X86TypeInfo info, +class MulOpM opcode, Format f, string mnemonic, X86TypeInfo info, X86FoldableSchedWrite sched, list pattern> - : ITy, SchedLoadReg; // NegOpR - Instructions like "neg reg", with implicit EFLAGS. @@ -465,7 +465,7 @@ class NegOpR opcode, string mnemonic, X86TypeInfo info> // NotOpR - Instructions like "not reg". class NotOpR opcode, string mnemonic, X86TypeInfo info> : UnaryOpR; // NegOpM - Instructions like "neg [mem]", with implicit EFLAGS. @@ -496,16 +496,16 @@ class BinOpRM_C opcode, Format f, string mnemonic, X86TypeInfo info, mnemonic, "{$src2, $dst|$dst, $src2}", pattern>; // IMulOpRR - Instructions like "imul reg, reg, i8". -class IMulOpRR opcode, string mnemonic, X86TypeInfo info, +class IMulOpRR opcode, string mnemonic, X86TypeInfo info, X86FoldableSchedWrite sched> : BinOpRR_C, + (X86smul_flag info.RegClass:$src1, + info.RegClass:$src2))]>, Sched<[sched]>, TB; // IMulOpRM - Instructions like "imul reg, reg, [mem]". -class IMulOpRM opcode, string mnemonic, X86TypeInfo info, +class IMulOpRM opcode, string mnemonic, X86TypeInfo info, X86FoldableSchedWrite sched> : BinOpRM_C opcode, string mnemonic, X86TypeInfo info, Sched<[sched.Folded, sched.ReadAfterFold]>, TB; // IMulOpRRI8 - Instructions like "imul reg, reg, i8". -class IMulOpRRI8 opcode, string mnemonic, X86TypeInfo info, +class IMulOpRRI8 opcode, string mnemonic, X86TypeInfo info, X86FoldableSchedWrite sched> : ITy, + (X86smul_flag info.RegClass:$src1, + info.Imm8NoSuOperator:$src2))]>, Sched<[sched]>{ let ImmT = Imm8; } // IMulOpRRI - Instructions like "imul reg, reg, i16/i32/i64". -class IMulOpRRI opcode, string mnemonic, X86TypeInfo info, +class IMulOpRRI opcode, string mnemonic, X86TypeInfo info, X86FoldableSchedWrite sched> : ITy, + (X86smul_flag info.RegClass:$src1, + info.ImmNoSuOperator:$src2))]>, Sched<[sched]>{ let ImmT = info.ImmEncoding; } // IMulOpRMI8 - Instructions like "imul reg, [mem], i8". -class IMulOpRMI8 opcode, string mnemonic, X86TypeInfo info, +class IMulOpRMI8 opcode, string mnemonic, X86TypeInfo info, X86FoldableSchedWrite sched> : ITy, + info.Imm8NoSuOperator:$src2))]>, Sched<[sched.Folded]>{ let ImmT = Imm8; } // IMulOpRMI - Instructions like "imul reg, [mem], i16/i32/i64". -class IMulOpRMI opcode, string mnemonic, X86TypeInfo info, +class IMulOpRMI opcode, string mnemonic, X86TypeInfo info, X86FoldableSchedWrite sched> : ITy, + info.ImmNoSuOperator:$src2))]>, Sched<[sched.Folded]>{ let ImmT = info.ImmEncoding; } @@ -639,7 +639,7 @@ let Predicates = [UseIncDec, In64BitMode] in { // SDNode results (i8, i32). // AL,AH = AL*GR8 let Defs = [AL,EFLAGS,AX], Uses = [AL] in -def MUL8r : MulOpR<0xF6, MRM4r, "mul", Xi8, WriteIMul8, +def MUL8r : MulOpR<0xF6, MRM4r, "mul", Xi8, WriteIMul8, // FIXME: Used for 8-bit mul, ignore result upper 8 bits. // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. diff --git a/llvm/lib/TextAPI/Platform.cpp b/llvm/lib/TextAPI/Platform.cpp index 673fcb764bf86..a432462c82e33 100644 --- a/llvm/lib/TextAPI/Platform.cpp +++ b/llvm/lib/TextAPI/Platform.cpp @@ -132,5 +132,12 @@ std::string getOSAndEnvironmentName(PlatformType Platform, llvm_unreachable("Unknown llvm::MachO::PlatformType enum"); } +VersionTuple mapToSupportedOSVersion(const Triple &Triple) { + const VersionTuple MinSupportedOS = Triple.getMinimumSupportedOSVersion(); + if (MinSupportedOS > Triple.getOSVersion()) + return MinSupportedOS; + return Triple.getOSVersion(); +} + } // end namespace MachO. } // end namespace llvm. diff --git a/llvm/lib/TextAPI/TextStubV5.cpp b/llvm/lib/TextAPI/TextStubV5.cpp index a9355fabe2202..ade4c867fa49d 100644 --- a/llvm/lib/TextAPI/TextStubV5.cpp +++ b/llvm/lib/TextAPI/TextStubV5.cpp @@ -293,8 +293,10 @@ Expected getTargetsSection(const Object *Section) { if (!TargetOrErr) return make_error(getParseErrorMsg(TBDKey::Target)); TargetOrErr->MinDeployment = Version; - - IFTargets.push_back(*TargetOrErr); + // Convert to LLVM::Triple to accurately compute minOS + platform + arch + // pairing. + IFTargets.push_back( + MachO::Target(Triple(getTargetTripleName(*TargetOrErr)))); } return std::move(IFTargets); } diff --git a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp index 3a609eefcb10e..9ca63bead9bc7 100644 --- a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp +++ b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp @@ -455,10 +455,12 @@ int llvm::libDriverMain(ArrayRef ArgsArr) { // For compatibility with MSVC, reverse member vector after de-duplication. std::reverse(Members.begin(), Members.end()); + bool Thin = Args.hasArg(OPT_llvmlibthin); if (Error E = writeArchive(OutputPath, Members, - /*WriteSymtab=*/true, object::Archive::K_GNU, - /*Deterministic*/ true, Args.hasArg(OPT_llvmlibthin))) { + /*WriteSymtab=*/true, + Thin ? object::Archive::K_GNU : object::Archive::K_COFF, + /*Deterministic*/ true, Thin)) { handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { llvm::errs() << OutputPath << ": " << EI.message() << "\n"; }); diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 473b41241b8a6..cf652836bef25 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -305,7 +306,7 @@ static bool tryToRecognizePopCount(Instruction &I) { Value *MulOp0; // Matching "(i * 0x01010101...) >> 24". if ((match(Op0, m_Mul(m_Value(MulOp0), m_SpecificInt(Mask01)))) && - match(Op1, m_SpecificInt(MaskShift))) { + match(Op1, m_SpecificInt(MaskShift))) { Value *ShiftOp0; // Matching "((i + (i >> 4)) & 0x0F0F0F0F...)". if (match(MulOp0, m_And(m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(4)), @@ -401,8 +402,8 @@ static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) { /// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids /// pessimistic codegen that has to account for setting errno and can enable /// vectorization. -static bool -foldSqrt(Instruction &I, TargetTransformInfo &TTI, TargetLibraryInfo &TLI) { +static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI, + TargetLibraryInfo &TLI) { // Match a call to sqrt mathlib function. auto *Call = dyn_cast(&I); if (!Call) @@ -824,6 +825,58 @@ static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL, return true; } +/// If C is a constant patterned array and all valid loaded results for given +/// alignment are same to a constant, return that constant. +static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) { + auto *LI = dyn_cast(&I); + if (!LI || LI->isVolatile()) + return false; + + // We can only fold the load if it is from a constant global with definitive + // initializer. Skip expensive logic if this is not the case. + auto *PtrOp = LI->getPointerOperand(); + auto *GV = dyn_cast(getUnderlyingObject(PtrOp)); + if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) + return false; + + Type *LoadTy = LI->getType(); + Constant *C = GV->getInitializer(); + + // Bail for large initializers in excess of 4K to avoid too many scans. + uint64_t GVSize = DL.getTypeAllocSize(C->getType()); + if (!GVSize || 4096 < GVSize) + return false; + + // Check whether pointer arrives back at Global Variable. + // If PtrOp is neither GlobalVariable nor GEP, it might not arrive back at + // GlobalVariable. + // TODO: implement GEP handling + unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType()); + // TODO: Determine stride based on GEPs. + APInt Stride(BW, 1); + APInt ConstOffset(BW, 0); + + // Any possible offset could be multiple of GEP stride. And any valid + // offset is multiple of load alignment, so checking only multiples of bigger + // one is sufficient to say results' equality. + if (auto LA = LI->getAlign(); + LA <= GV->getAlign().valueOrOne() && Stride.getZExtValue() < LA.value()) + Stride = APInt(BW, LA.value()); + + Constant *Ca = ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL); + if (!Ca) + return false; + + unsigned E = GVSize - DL.getTypeStoreSize(LoadTy); + for (; ConstOffset.getZExtValue() <= E; ConstOffset += Stride) + if (Ca != ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL)) + return false; + + I.replaceAllUsesWith(Ca); + + return true; +} + /// This is the entry point for folds that could be implemented in regular /// InstCombine, but they are separated because they are not expected to /// occur frequently and/or have more than a constant-length pattern match. @@ -850,6 +903,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT, MadeChange |= tryToFPToSat(I, TTI); MadeChange |= tryToRecognizeTableBasedCttz(I); MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA); + MadeChange |= foldPatternedLoads(I, DL); // NOTE: This function introduces erasing of the instruction `I`, so it // needs to be called at the end of this sequence, otherwise we may make // bugs. diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index dd1a3b78a378c..3b1a174f5cc63 100644 --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -67,6 +67,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include #include @@ -220,6 +221,8 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM, // pass in the loaded pointers. SmallVector Args; const DataLayout &DL = F->getParent()->getDataLayout(); + SmallVector DeadArgs; + while (!F->use_empty()) { CallBase &CB = cast(*F->user_back()); assert(CB.getCalledFunction() == F); @@ -255,6 +258,9 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM, Args.push_back(LI); ArgAttrVec.push_back(AttributeSet()); } + } else { + assert(ArgsToPromote.count(&*I) && I->use_empty()); + DeadArgs.emplace_back(AI->get()); } } @@ -297,6 +303,8 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM, CB.eraseFromParent(); } + RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadArgs); + // Since we have now created the new function, splice the body of the old // function right into the new function, leaving the old rotting hulk of the // function empty. diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index fc8b12df67822..762e4ce0c3e79 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -14,9 +14,9 @@ // subsequently annotated with an attribute for later transformation. // // The transformations can be performed either directly on IR (regular LTO), or -// (eventually) on a ThinLTO index (later applied to the IR during the ThinLTO -// backend). Both types of LTO operate on a the same base graph representation, -// which uses CRTP to support either IR or Index formats. +// on a ThinLTO index (and later applied to the IR during the ThinLTO backend). +// Both types of LTO operate on a the same base graph representation, which +// uses CRTP to support either IR or Index formats. // //===----------------------------------------------------------------------===// @@ -28,9 +28,11 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/MemoryProfileInfo.h" +#include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" @@ -458,6 +460,56 @@ class ModuleCallsiteContextGraph const Module &Mod; }; +/// Represents a call in the summary index graph, which can either be an +/// allocation or an interior callsite node in an allocation's context. +/// Holds a pointer to the corresponding data structure in the index. +struct IndexCall : public PointerUnion { + IndexCall() : PointerUnion() {} + IndexCall(std::nullptr_t) : IndexCall() {} + IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {} + IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {} + + IndexCall *operator->() { return this; } + + void print(raw_ostream &OS) const { + if (auto *AI = dyn_cast()) + OS << *AI; + else { + auto *CI = dyn_cast(); + assert(CI); + OS << *CI; + } + } +}; + +/// CRTP derived class for graphs built from summary index (ThinLTO). +class IndexCallsiteContextGraph + : public CallsiteContextGraph { +public: + IndexCallsiteContextGraph( + ModuleSummaryIndex &Index, + function_ref + isPrevailing); + +private: + friend CallsiteContextGraph; + + uint64_t getStackId(uint64_t IdOrIndex) const; + bool calleeMatchesFunc(IndexCall &Call, const FunctionSummary *Func); + uint64_t getLastStackId(IndexCall &Call); + std::vector getStackIdsWithContextNodesForCall(IndexCall &Call); + std::string getLabel(const FunctionSummary *Func, const IndexCall &Call, + unsigned CloneNo) const; + + // Saves mapping from function summaries containing memprof records back to + // its VI, for use in checking and debugging. + std::map FSToVIMap; + + const ModuleSummaryIndex &Index; +}; + namespace { struct FieldSeparator { @@ -475,10 +527,24 @@ raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) { return OS << FS.Sep; } +// Map the uint8_t alloc types (which may contain NotCold|Cold) to the alloc +// type we should actually use on the corresponding allocation. +// If we can't clone a node that has NotCold+Cold alloc type, we will fall +// back to using NotCold. So don't bother cloning to distinguish NotCold+Cold +// from NotCold. +AllocationType allocTypeToUse(uint8_t AllocTypes) { + assert(AllocTypes != (uint8_t)AllocationType::None); + if (AllocTypes == + ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold)) + return AllocationType::NotCold; + else + return (AllocationType)AllocTypes; +} + } // end anonymous namespace template -ContextNode * +typename CallsiteContextGraph::ContextNode * CallsiteContextGraph::getNodeForInst( const CallInfo &C) { ContextNode *Node = getNodeForAlloc(C); @@ -493,7 +559,7 @@ CallsiteContextGraph::getNodeForInst( } template -ContextNode * +typename CallsiteContextGraph::ContextNode * CallsiteContextGraph::getNodeForAlloc( const CallInfo &C) { auto AllocCallNode = AllocationCallToContextNodeMap.find(C); @@ -504,7 +570,7 @@ CallsiteContextGraph::getNodeForAlloc( } template -ContextNode * +typename CallsiteContextGraph::ContextNode * CallsiteContextGraph::getNodeForStackId( uint64_t StackId) { auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId); @@ -540,7 +606,7 @@ void CallsiteContextGraph::ContextNode:: } template -ContextEdge * +typename CallsiteContextGraph::ContextEdge * CallsiteContextGraph::ContextNode:: findEdgeFromCallee(const ContextNode *Callee) { for (const auto &Edge : CalleeEdges) @@ -550,7 +616,7 @@ CallsiteContextGraph::ContextNode:: } template -ContextEdge * +typename CallsiteContextGraph::ContextEdge * CallsiteContextGraph::ContextNode:: findEdgeFromCaller(const ContextNode *Caller) { for (const auto &Edge : CallerEdges) @@ -599,7 +665,7 @@ uint8_t CallsiteContextGraph::computeAllocType( } template -ContextNode * +typename CallsiteContextGraph::ContextNode * CallsiteContextGraph::addAllocNode( CallInfo Call, const FuncTy *F) { assert(!getNodeForAlloc(Call)); @@ -966,13 +1032,13 @@ void CallsiteContextGraph::updateStackNodes() { // latter is so that we can specially handle calls that have identical stack // id sequences (either due to cloning or artificially because of the MIB // context pruning). - std::sort(Calls.begin(), Calls.end(), - [](const CallContextInfo &A, const CallContextInfo &B) { - auto &IdsA = std::get<1>(A); - auto &IdsB = std::get<1>(B); - return IdsA.size() > IdsB.size() || - (IdsA.size() == IdsB.size() && IdsA < IdsB); - }); + std::stable_sort(Calls.begin(), Calls.end(), + [](const CallContextInfo &A, const CallContextInfo &B) { + auto &IdsA = std::get<1>(A); + auto &IdsB = std::get<1>(B); + return IdsA.size() > IdsB.size() || + (IdsA.size() == IdsB.size() && IdsA < IdsB); + }); // Find the node for the last stack id, which should be the same // across all calls recorded for this id, and is the id for this @@ -1118,6 +1184,20 @@ uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *Call) { return CallsiteContext.back(); } +uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) { + assert(Call.is()); + CallStack::const_iterator> + CallsiteContext(Call.dyn_cast()); + // Need to convert index into stack id. + return Index.getStackIdAtIndex(CallsiteContext.back()); +} + +static std::string getMemProfFuncName(Twine Base, unsigned CloneNo) { + if (!CloneNo) + return Base.str(); + return (Base + ".memprof." + Twine(CloneNo)).str(); +} + std::string ModuleCallsiteContextGraph::getLabel(const Function *Func, const Instruction *Call, unsigned CloneNo) const { @@ -1126,6 +1206,22 @@ std::string ModuleCallsiteContextGraph::getLabel(const Function *Func, .str(); } +std::string IndexCallsiteContextGraph::getLabel(const FunctionSummary *Func, + const IndexCall &Call, + unsigned CloneNo) const { + auto VI = FSToVIMap.find(Func); + assert(VI != FSToVIMap.end()); + if (Call.is()) + return (VI->second.name() + " -> alloc").str(); + else { + auto *Callsite = Call.dyn_cast(); + return (VI->second.name() + " -> " + + getMemProfFuncName(Callsite->Callee.name(), + Callsite->Clones[CloneNo])) + .str(); + } +} + std::vector ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall( Instruction *Call) { @@ -1135,6 +1231,16 @@ ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall( CallsiteContext); } +std::vector +IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) { + assert(Call.is()); + CallStack::const_iterator> + CallsiteContext(Call.dyn_cast()); + return getStackIdsWithContextNodes::const_iterator>( + CallsiteContext); +} + template template std::vector @@ -1207,6 +1313,84 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(Module &M) : Mod(M) { Call.call()->setMetadata(LLVMContext::MD_callsite, nullptr); } +IndexCallsiteContextGraph::IndexCallsiteContextGraph( + ModuleSummaryIndex &Index, + function_ref + isPrevailing) + : Index(Index) { + for (auto &I : Index) { + auto VI = Index.getValueInfo(I); + for (auto &S : VI.getSummaryList()) { + // We should only add the prevailing nodes. Otherwise we may try to clone + // in a weak copy that won't be linked (and may be different than the + // prevailing version). + // We only keep the memprof summary on the prevailing copy now when + // building the combined index, as a space optimization, however don't + // rely on this optimization. The linker doesn't resolve local linkage + // values so don't check whether those are prevailing. + if (!GlobalValue::isLocalLinkage(S->linkage()) && + !isPrevailing(VI.getGUID(), S.get())) + continue; + auto *FS = dyn_cast(S.get()); + if (!FS) + continue; + std::vector CallsWithMetadata; + if (!FS->allocs().empty()) { + for (auto &AN : FS->mutableAllocs()) { + // This can happen because of recursion elimination handling that + // currently exists in ModuleSummaryAnalysis. Skip these for now. + // We still added them to the summary because we need to be able to + // correlate properly in applyImport in the backends. + if (AN.MIBs.empty()) + continue; + CallsWithMetadata.push_back({&AN}); + auto *AllocNode = addAllocNode({&AN}, FS); + // Pass an empty CallStack to the CallsiteContext (second) + // parameter, since for ThinLTO we already collapsed out the inlined + // stack ids on the allocation call during ModuleSummaryAnalysis. + CallStack::const_iterator> + EmptyContext; + // Now add all of the MIBs and their stack nodes. + for (auto &MIB : AN.MIBs) { + CallStack::const_iterator> + StackContext(&MIB); + addStackNodesForMIB::const_iterator>( + AllocNode, StackContext, EmptyContext, MIB.AllocType); + } + assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None); + // Initialize version 0 on the summary alloc node to the current alloc + // type, unless it has both types in which case make it default, so + // that in the case where we aren't able to clone the original version + // always ends up with the default allocation behavior. + AN.Versions[0] = (uint8_t)allocTypeToUse(AllocNode->AllocTypes); + } + } + // For callsite metadata, add to list for this function for later use. + if (!FS->callsites().empty()) + for (auto &SN : FS->mutableCallsites()) + CallsWithMetadata.push_back({&SN}); + + if (!CallsWithMetadata.empty()) + FuncToCallsWithMetadata.push_back({FS, CallsWithMetadata}); + + if (!FS->allocs().empty() || !FS->callsites().empty()) + FSToVIMap[FS] = VI; + } + } + + if (DumpCCG) { + dbgs() << "CCG before updating call stack chains:\n"; + dbgs() << *this; + } + + if (ExportToDot) + exportToDot("prestackupdate"); + + updateStackNodes(); + + handleCallsitesWithMultipleTargets(); +} + template void CallsiteContextGraph::handleCallsitesWithMultipleTargets() { @@ -1251,6 +1435,12 @@ uint64_t ModuleCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const { return IdOrIndex; } +uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const { + // In the Index case this is an index into the stack id list in the summary + // index, convert it to an Id. + return Index.getStackIdAtIndex(IdOrIndex); +} + bool ModuleCallsiteContextGraph::calleeMatchesFunc(Instruction *Call, const Function *Func) { auto *CB = dyn_cast(Call); @@ -1264,6 +1454,23 @@ bool ModuleCallsiteContextGraph::calleeMatchesFunc(Instruction *Call, return Alias && Alias->getAliasee() == Func; } +bool IndexCallsiteContextGraph::calleeMatchesFunc(IndexCall &Call, + const FunctionSummary *Func) { + ValueInfo Callee = Call.dyn_cast()->Callee; + // If there is no summary list then this is a call to an externally defined + // symbol. + AliasSummary *Alias = + Callee.getSummaryList().empty() + ? nullptr + : dyn_cast(Callee.getSummaryList()[0].get()); + assert(FSToVIMap.count(Func)); + return Callee == FSToVIMap[Func] || + // If callee is an alias, check the aliasee, since only function + // summary base objects will contain the stack node summaries and thus + // get a context node. + (Alias && Alias->getAliaseeVI() == FSToVIMap[Func]); +} + static std::string getAllocTypeString(uint8_t AllocTypes) { if (!AllocTypes) return "None"; @@ -1581,3 +1788,11 @@ PreservedAnalyses MemProfContextDisambiguation::run(Module &M, return PreservedAnalyses::all(); return PreservedAnalyses::none(); } + +void MemProfContextDisambiguation::run( + ModuleSummaryIndex &Index, + function_ref + isPrevailing) { + IndexCallsiteContextGraph CCG(Index, isPrevailing); + CCG.process(); +} diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp index 7a40ddee81798..ed1d5575db69a 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp @@ -286,9 +286,16 @@ uint32_t SampleProfileProber::getCallsiteId(const Instruction *Call) const { void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) { Module *M = F.getParent(); MDBuilder MDB(F.getContext()); - // Compute a GUID without considering the function's linkage type. This is - // fine since function name is the only key in the profile database. - uint64_t Guid = Function::getGUID(F.getName()); + // Since the GUID from probe desc and inline stack are computed seperately, we + // need to make sure their names are consistent, so here also use the name + // from debug info. + StringRef FName = F.getName(); + if (auto *SP = F.getSubprogram()) { + FName = SP->getLinkageName(); + if (FName.empty()) + FName = SP->getName(); + } + uint64_t Guid = Function::getGUID(FName); // Assign an artificial debug line to a probe that doesn't come with a real // line. A probe not having a debug line will get an incomplete inline @@ -371,7 +378,7 @@ void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) { // - FunctionHash. // - FunctionName auto Hash = getFunctionHash(); - auto *MD = MDB.createPseudoProbeDesc(Guid, Hash, &F); + auto *MD = MDB.createPseudoProbeDesc(Guid, Hash, FName); auto *NMD = M->getNamedMetadata(PseudoProbeDescMetadataName); assert(NMD && "llvm.pseudo_probe_desc should be pre-created"); NMD->addOperand(MD); diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index e380b47c735fe..8224de30d6986 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -1391,9 +1391,20 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo, IsExported = true; if (CSInfo.AllCallSitesDevirted) return; + + std::map CallBases; for (auto &&VCallSite : CSInfo.CallSites) { CallBase &CB = VCallSite.CB; + if (CallBases.find(&CB) != CallBases.end()) { + // When finding devirtualizable calls, it's possible to find the same + // vtable passed to multiple llvm.type.test or llvm.type.checked.load + // calls, which can cause duplicate call sites to be recorded in + // [Const]CallSites. If we've already found one of these + // call instances, just ignore it. It will be replaced later. + continue; + } + // Jump tables are only profitable if the retpoline mitigation is enabled. Attribute FSAttr = CB.getCaller()->getFnAttribute("target-features"); if (!FSAttr.isValid() || @@ -1440,8 +1451,7 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo, AttributeList::get(M.getContext(), Attrs.getFnAttrs(), Attrs.getRetAttrs(), NewArgAttrs)); - CB.replaceAllUsesWith(NewCS); - CB.eraseFromParent(); + CallBases[&CB] = NewCS; // This use is no longer unsafe. if (VCallSite.NumUnsafeUses) @@ -1451,6 +1461,11 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo, // retpoline mitigation, which would mean that they are lowered to // llvm.type.test and therefore require an llvm.type.test resolution for the // type identifier. + + std::for_each(CallBases.begin(), CallBases.end(), [](auto &CBs) { + CBs.first->replaceAllUsesWith(CBs.second); + CBs.first->eraseFromParent(); + }); }; Apply(SlotInfo.CSInfo); for (auto &P : SlotInfo.ConstCSInfo) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 2b61b58dbc36a..0708fb44b982b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1288,9 +1288,15 @@ foldShuffledIntrinsicOperands(IntrinsicInst *II, Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // Don't try to simplify calls without uses. It will not do anything useful, // but will result in the following folds being skipped. - if (!CI.use_empty()) - if (Value *V = simplifyCall(&CI, SQ.getWithInstruction(&CI))) + if (!CI.use_empty()) { + SmallVector Args; + Args.reserve(CI.arg_size()); + for (Value *Op : CI.args()) + Args.push_back(Op); + if (Value *V = simplifyCall(&CI, CI.getCalledOperand(), Args, + SQ.getWithInstruction(&CI))) return replaceInstUsesWith(CI, V); + } if (Value *FreedOp = getFreedOperand(&CI, &TLI)) return visitFree(CI, FreedOp); @@ -1789,6 +1795,10 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { Function *Bswap = Intrinsic::getDeclaration(Mod, Intrinsic::bswap, Ty); return CallInst::Create(Bswap, { Op0 }); } + if (Instruction *BitOp = + matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ true, + /*MatchBitReversals*/ true)) + return BitOp; } // Left or right might be masked. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 1f2441bc9fcf9..3d1dbdd6270d5 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -3163,6 +3163,134 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) { return nullptr; } +// Return true if we can safely remove the select instruction for std::bit_ceil +// pattern. +static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0, + const APInt *Cond1, Value *CtlzOp, + unsigned BitWidth) { + // The challenge in recognizing std::bit_ceil(X) is that the operand is used + // for the CTLZ proper and select condition, each possibly with some + // operation like add and sub. + // + // Our aim is to make sure that -ctlz & (BitWidth - 1) == 0 even when the + // select instruction would select 1, which allows us to get rid of the select + // instruction. + // + // To see if we can do so, we do some symbolic execution with ConstantRange. + // Specifically, we compute the range of values that Cond0 could take when + // Cond == false. Then we successively transform the range until we obtain + // the range of values that CtlzOp could take. + // + // Conceptually, we follow the def-use chain backward from Cond0 while + // transforming the range for Cond0 until we meet the common ancestor of Cond0 + // and CtlzOp. Then we follow the def-use chain forward until we obtain the + // range for CtlzOp. That said, we only follow at most one ancestor from + // Cond0. Likewise, we only follow at most one ancestor from CtrlOp. + + ConstantRange CR = ConstantRange::makeExactICmpRegion( + CmpInst::getInversePredicate(Pred), *Cond1); + + // Match the operation that's used to compute CtlzOp from CommonAncestor. If + // CtlzOp == CommonAncestor, return true as no operation is needed. If a + // match is found, execute the operation on CR, update CR, and return true. + // Otherwise, return false. + auto MatchForward = [&](Value *CommonAncestor) { + const APInt *C = nullptr; + if (CtlzOp == CommonAncestor) + return true; + if (match(CtlzOp, m_Add(m_Specific(CommonAncestor), m_APInt(C)))) { + CR = CR.add(*C); + return true; + } + if (match(CtlzOp, m_Sub(m_APInt(C), m_Specific(CommonAncestor)))) { + CR = ConstantRange(*C).sub(CR); + return true; + } + if (match(CtlzOp, m_Not(m_Specific(CommonAncestor)))) { + CR = CR.binaryNot(); + return true; + } + return false; + }; + + const APInt *C = nullptr; + Value *CommonAncestor; + if (MatchForward(Cond0)) { + // Cond0 is either CtlzOp or CtlzOp's parent. CR has been updated. + } else if (match(Cond0, m_Add(m_Value(CommonAncestor), m_APInt(C)))) { + CR = CR.sub(*C); + if (!MatchForward(CommonAncestor)) + return false; + // Cond0's parent is either CtlzOp or CtlzOp's parent. CR has been updated. + } else { + return false; + } + + // Return true if all the values in the range are either 0 or negative (if + // treated as signed). We do so by evaluating: + // + // CR - 1 u>= (1 << BitWidth) - 1. + APInt IntMax = APInt::getSignMask(BitWidth) - 1; + CR = CR.sub(APInt(BitWidth, 1)); + return CR.icmp(ICmpInst::ICMP_UGE, IntMax); +} + +// Transform the std::bit_ceil(X) pattern like: +// +// %dec = add i32 %x, -1 +// %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false) +// %sub = sub i32 32, %ctlz +// %shl = shl i32 1, %sub +// %ugt = icmp ugt i32 %x, 1 +// %sel = select i1 %ugt, i32 %shl, i32 1 +// +// into: +// +// %dec = add i32 %x, -1 +// %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false) +// %neg = sub i32 0, %ctlz +// %masked = and i32 %ctlz, 31 +// %shl = shl i32 1, %sub +// +// Note that the select is optimized away while the shift count is masked with +// 31. We handle some variations of the input operand like std::bit_ceil(X + +// 1). +static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder) { + Type *SelType = SI.getType(); + unsigned BitWidth = SelType->getScalarSizeInBits(); + + Value *FalseVal = SI.getFalseValue(); + Value *TrueVal = SI.getTrueValue(); + ICmpInst::Predicate Pred; + const APInt *Cond1; + Value *Cond0, *Ctlz, *CtlzOp; + if (!match(SI.getCondition(), m_ICmp(Pred, m_Value(Cond0), m_APInt(Cond1)))) + return nullptr; + + if (match(TrueVal, m_One())) { + std::swap(FalseVal, TrueVal); + Pred = CmpInst::getInversePredicate(Pred); + } + + if (!match(FalseVal, m_One()) || + !match(TrueVal, + m_OneUse(m_Shl(m_One(), m_OneUse(m_Sub(m_SpecificInt(BitWidth), + m_Value(Ctlz)))))) || + !match(Ctlz, m_Intrinsic(m_Value(CtlzOp), m_Zero())) || + !isSafeToRemoveBitCeilSelect(Pred, Cond0, Cond1, CtlzOp, BitWidth)) + return nullptr; + + // Build 1 << (-CTLZ & (BitWidth-1)). The negation likely corresponds to a + // single hardware instruction as opposed to BitWidth - CTLZ, where BitWidth + // is an integer constant. Masking with BitWidth-1 comes free on some + // hardware as part of the shift instruction. + Value *Neg = Builder.CreateNeg(Ctlz); + Value *Masked = + Builder.CreateAnd(Neg, ConstantInt::get(SelType, BitWidth - 1)); + return BinaryOperator::Create(Instruction::Shl, ConstantInt::get(SelType, 1), + Masked); +} + Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { Value *CondVal = SI.getCondition(); Value *TrueVal = SI.getTrueValue(); @@ -3590,5 +3718,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { if (sinkNotIntoOtherHandOfLogicalOp(SI)) return &SI; + if (Instruction *I = foldBitCeil(SI, Builder)) + return I; + return nullptr; } diff --git a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp index 8b1d39ad412fa..04ffbf6636e1a 100644 --- a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -56,7 +56,7 @@ static Value *getBoundsCheckCond(Value *Ptr, Value *InstVal, const DataLayout &DL, TargetLibraryInfo &TLI, ObjectSizeOffsetEvaluator &ObjSizeEval, BuilderTy &IRB, ScalarEvolution &SE) { - uint64_t NeededSize = DL.getTypeStoreSize(InstVal->getType()); + TypeSize NeededSize = DL.getTypeStoreSize(InstVal->getType()); LLVM_DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize) << " bytes\n"); @@ -72,7 +72,7 @@ static Value *getBoundsCheckCond(Value *Ptr, Value *InstVal, ConstantInt *SizeCI = dyn_cast(Size); Type *IntTy = DL.getIntPtrType(Ptr->getType()); - Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize); + Value *NeededSizeVal = IRB.CreateTypeSize(IntTy, NeededSize); auto SizeRange = SE.getUnsignedRange(SE.getSCEV(Size)); auto OffsetRange = SE.getUnsignedRange(SE.getSCEV(Offset)); diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index ca498d08422f7..f98cb67481154 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -964,7 +964,7 @@ bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O) { return false; // FIXME IRBuilder<> IRB(O.getInsn()); - if (isPowerOf2_64(O.TypeStoreSize) && + if (!O.TypeStoreSize.isScalable() && isPowerOf2_64(O.TypeStoreSize) && (O.TypeStoreSize / 8 <= (1ULL << (kNumberOfAccessSizes - 1))) && (!O.Alignment || *O.Alignment >= Mapping.getObjectAlignment() || *O.Alignment >= O.TypeStoreSize / 8)) { @@ -980,7 +980,9 @@ bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O) { } else { IRB.CreateCall(HwasanMemoryAccessCallbackSized[O.IsWrite], {IRB.CreatePointerCast(Addr, IntptrTy), - ConstantInt::get(IntptrTy, O.TypeStoreSize / 8)}); + IRB.CreateUDiv(IRB.CreateTypeSize(IntptrTy, + O.TypeStoreSize), + ConstantInt::get(IntptrTy, 8))}); } untagPointerOperand(O.getInsn(), Addr); diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 75cb9e0c0e63d..953ce72c1cec9 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -1183,13 +1183,29 @@ struct MemorySanitizerVisitor : public InstVisitor { /// Fill memory range with the given origin value. void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr, TypeSize TS, Align Alignment) { - unsigned Size = TS.getFixedValue(); const DataLayout &DL = F.getParent()->getDataLayout(); const Align IntptrAlignment = DL.getABITypeAlign(MS.IntptrTy); unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy); assert(IntptrAlignment >= kMinOriginAlignment); assert(IntptrSize >= kOriginSize); + // Note: The loop based formation works for fixed length vectors too, + // however we prefer to unroll and specialize alignment below. + if (TS.isScalable()) { + Value *Size = IRB.CreateTypeSize(IRB.getInt32Ty(), TS); + Value *RoundUp = IRB.CreateAdd(Size, IRB.getInt32(kOriginSize - 1)); + Value *End = IRB.CreateUDiv(RoundUp, IRB.getInt32(kOriginSize)); + auto [InsertPt, Index] = + SplitBlockAndInsertSimpleForLoop(End, &*IRB.GetInsertPoint()); + IRB.SetInsertPoint(InsertPt); + + Value *GEP = IRB.CreateGEP(MS.OriginTy, OriginPtr, Index); + IRB.CreateAlignedStore(Origin, GEP, kMinOriginAlignment); + return; + } + + unsigned Size = TS.getFixedValue(); + unsigned Ofs = 0; Align CurrentAlignment = Alignment; if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) { @@ -1575,6 +1591,8 @@ struct MemorySanitizerVisitor : public InstVisitor { if (ArrayType *Array = dyn_cast(V->getType())) return collapseArrayShadow(Array, V, IRB); if (isa(V->getType())) { + if (isa(V->getType())) + return convertShadowToScalar(IRB.CreateOrReduce(V), IRB); unsigned BitWidth = V->getType()->getPrimitiveSizeInBits().getFixedValue(); return IRB.CreateBitCast(V, IntegerType::get(*MS.C, BitWidth)); diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 43f3beb4b34b5..28e6794389538 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -910,60 +910,27 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, assert(isLoopCounter(IndVar, L, SE)); assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer"); const SCEVAddRecExpr *AR = cast(SE->getSCEV(IndVar)); - const SCEV *IVInit = AR->getStart(); assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); - // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter - // finds a valid pointer IV. - if (IndVar->getType()->isPointerTy()) { - const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR; - const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE); - assert(SE->isLoopInvariant(IVLimit, L) && - "Computed iteration count is not loop invariant!"); - return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), - ExitingBB->getTerminator()); - } else { - // In any other case, convert both IVInit and ExitCount to integers before - // comparing. This may result in SCEV expansion of pointers, but in practice - // SCEV will fold the pointer arithmetic away as such: - // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). - // - // Valid Cases: (1) both integers is most common; (2) both may be pointers - // for simple memset-style loops. - // - // IVInit integer and ExitCount pointer would only occur if a canonical IV - // were generated on top of case #2, which is not expected. - - // For unit stride, IVCount = Start + ExitCount with 2's complement - // overflow. - - // For integer IVs, truncate the IV before computing IVInit + BECount, - // unless we know apriori that the limit must be a constant when evaluated - // in the bitwidth of the IV. We prefer (potentially) keeping a truncate - // of the IV in the loop over a (potentially) expensive expansion of the - // widened exit count add(zext(add)) expression. - if (SE->getTypeSizeInBits(IVInit->getType()) - > SE->getTypeSizeInBits(ExitCount->getType())) { - if (isa(IVInit) && isa(ExitCount)) - ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); - else - IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); - } - - const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); - - if (UsePostInc) - IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); - - // Expand the code for the iteration count. - assert(SE->isLoopInvariant(IVLimit, L) && - "Computed iteration count is not loop invariant!"); - // Ensure that we generate the same type as IndVar, or a smaller integer - // type. In the presence of null pointer values, we have an integer type - // SCEV expression (IVInit) for a pointer type IV value (IndVar). - return Rewriter.expandCodeFor(IVLimit, ExitCount->getType(), - ExitingBB->getTerminator()); + // For integer IVs, truncate the IV before computing the limit unless we + // know apriori that the limit must be a constant when evaluated in the + // bitwidth of the IV. We prefer (potentially) keeping a truncate of the + // IV in the loop over a (potentially) expensive expansion of the widened + // exit count add(zext(add)) expression. + if (IndVar->getType()->isIntegerTy() && + SE->getTypeSizeInBits(AR->getType()) > + SE->getTypeSizeInBits(ExitCount->getType())) { + const SCEV *IVInit = AR->getStart(); + if (!isa(IVInit) || !isa(ExitCount)) + AR = cast(SE->getTruncateExpr(AR, ExitCount->getType())); } + + const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR; + const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE); + assert(SE->isLoopInvariant(IVLimit, L) && + "Computed iteration count is not loop invariant!"); + return Rewriter.expandCodeFor(IVLimit, ARBase->getType(), + ExitingBB->getTerminator()); } /// This method rewrites the exit condition of the loop to be a canonical != diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 82312de71c72c..fbdc436e0d37e 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -6769,14 +6769,7 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, // iteration. The simplest case to consider is a candidate IV which is // narrower than the trip count (and thus original IV), but this can // also happen due to non-unit strides on the candidate IVs. - // TODO: This check should be replaceable with PostInc->hasNoSelfWrap(), - // but in practice we appear to be missing inference for cases we should - // be able to catch. - ConstantRange StepCR = SE.getSignedRange(AddRec->getStepRecurrence(SE)); - ConstantRange BECountCR = SE.getUnsignedRange(BECount); - unsigned NoOverflowBitWidth = BECountCR.getActiveBits() + StepCR.getMinSignedBits(); - unsigned ARBitWidth = SE.getTypeSizeInBits(AddRec->getType()); - if (NoOverflowBitWidth > ARBitWidth) + if (!AddRec->hasNoSelfWrap()) continue; const SCEVAddRecExpr *PostInc = AddRec->getPostIncExpr(SE); diff --git a/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/llvm/lib/Transforms/Utils/FunctionComparator.cpp index af8bc8126160e..7fb6a7415a6fe 100644 --- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp +++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp @@ -157,7 +157,25 @@ int FunctionComparator::cmpAttrs(const AttributeList L, return 0; } -int FunctionComparator::cmpMetadata(const MDNode *L, const MDNode *R) const { +int FunctionComparator::cmpMetadata(const Metadata *L, + const Metadata *R) const { + // TODO: the following routine coerce the metadata contents into constants + // before comparison. + // It ignores any other cases, so that the metadata nodes are considered + // equal even though this is not correct. + // We should structurally compare the metadata nodes to be perfect here. + auto *CL = dyn_cast(L); + auto *CR = dyn_cast(R); + if (CL == CR) + return 0; + if (!CL) + return -1; + if (!CR) + return 1; + return cmpConstants(CL->getValue(), CR->getValue()); +} + +int FunctionComparator::cmpMDNode(const MDNode *L, const MDNode *R) const { if (L == R) return 0; if (!L) @@ -172,23 +190,9 @@ int FunctionComparator::cmpMetadata(const MDNode *L, const MDNode *R) const { // function semantically. if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands())) return Res; - for (size_t I = 0; I < L->getNumOperands(); ++I) { - // TODO: the following routine coerce the metadata contents into numbers - // before comparison. - // It ignores any other cases, so that the metadata nodes are considered - // equal even though this is not correct. - // We should structurally compare the metadata nodes to be perfect here. - ConstantInt *LLow = mdconst::extract(L->getOperand(I)); - ConstantInt *RLow = mdconst::extract(R->getOperand(I)); - if (LLow == RLow) - continue; - if (!LLow) - return -1; - if (!RLow) - return 1; - if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue())) + for (size_t I = 0; I < L->getNumOperands(); ++I) + if (int Res = cmpMetadata(L->getOperand(I), R->getOperand(I))) return Res; - } return 0; } @@ -209,7 +213,7 @@ int FunctionComparator::cmpInstMetadata(Instruction const *L, auto const [KeyR, MR] = MDR[I]; if (int Res = cmpNumbers(KeyL, KeyR)) return Res; - if (int Res = cmpMetadata(ML, MR)) + if (int Res = cmpMDNode(ML, MR)) return Res; } return 0; @@ -645,8 +649,8 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpNumbers(CI->getTailCallKind(), cast(R)->getTailCallKind())) return Res; - return cmpMetadata(L->getMetadata(LLVMContext::MD_range), - R->getMetadata(LLVMContext::MD_range)); + return cmpMDNode(L->getMetadata(LLVMContext::MD_range), + R->getMetadata(LLVMContext::MD_range)); } if (const InsertValueInst *IVI = dyn_cast(L)) { ArrayRef LIndices = IVI->getIndices(); diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 5c1fd6e9ae4a4..31deb08d45989 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -2675,14 +2675,7 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, intersectAccessGroups(K, J)); break; case LLVMContext::MD_range: - - // If K does move, use most generic range. Otherwise keep the range of - // K. - if (DoesKMove) - // FIXME: If K does move, we should drop the range info and nonnull. - // Currently this function is used with DoesKMove in passes - // doing hoisting/sinking and the current behavior of using the - // most generic range is correct in those cases. + if (DoesKMove || !K->hasMetadata(LLVMContext::MD_noundef)) K->setMetadata(Kind, MDNode::getMostGenericRange(JMD, KMD)); break; case LLVMContext::MD_fpmath: diff --git a/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll b/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll index 44081f32d0af6..60b2e9d50dd59 100644 --- a/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll +++ b/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll @@ -94,7 +94,7 @@ define void @test_01(i32 %a, i32 %b) { ; CHECK: %s3 = add i32 %is1, %phi5 ; CHECK-NEXT: --> {{{{}}(59 + (2 * %a) + %b),+,6}<%loop1>,+,2}<%loop2> ; CHECK: %s4 = add i32 %phi2, %is2 -; CHECK-NEXT: --> {{{{}}(159 + (2 * %b)),+,2}<%loop1>,+,6}<%loop2> +; CHECK-NEXT: --> {{{{}}(159 + (2 * %b)),+,2}<%loop1>,+,6}<%loop2> ; CHECK: %s5 = add i32 %is1, %is2 ; CHECK-NEXT: --> {{{{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2> ; CHECK: %s6 = add i32 %is2, %is1 diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll index e1acec162d3c8..d4d3a9e13e277 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll @@ -1633,9 +1633,9 @@ define i32 @ptr_induction_ult_1(ptr %a, ptr %b) { ; CHECK-LABEL: 'ptr_induction_ult_1' ; CHECK-NEXT: Classifying expressions for: @ptr_induction_ult_1 ; CHECK-NEXT: %ptr.iv = phi ptr [ %ptr.iv.next, %loop ], [ %a, %entry ] -; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: %a LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: %a LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %ptr.iv.next = getelementptr i32, ptr %ptr.iv, i64 1 -; CHECK-NEXT: --> {(4 + %a),+,4}<%loop> U: full-set S: full-set Exits: (4 + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(4 + %a),+,4}<%loop> U: full-set S: full-set Exits: (4 + %a) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @ptr_induction_ult_1 ; CHECK-NEXT: Loop %loop: backedge-taken count is 0 ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is 0 @@ -1686,18 +1686,17 @@ exit: ret i32 0 } -; TODO: The pointer induction variable can be implied No Self Wrap. define void @gep_addrec_nw(ptr %a) { ; CHECK-LABEL: 'gep_addrec_nw' ; CHECK-NEXT: Classifying expressions for: @gep_addrec_nw ; CHECK-NEXT: %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %a, %entry ] -; CHECK-NEXT: --> {%a,+,4}<%for.body> U: full-set S: full-set Exits: (1512 + %a) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {%a,+,4}<%for.body> U: full-set S: full-set Exits: (1512 + %a) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %lsr.iv = phi i64 [ %lsr.iv.next, %for.body ], [ 379, %entry ] ; CHECK-NEXT: --> {379,+,-1}<%for.body> U: [1,380) S: [1,380) Exits: 1 LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %lsr.iv.next = add nsw i64 %lsr.iv, -1 ; CHECK-NEXT: --> {378,+,-1}<%for.body> U: [0,379) S: [0,379) Exits: 0 LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %uglygep2 = getelementptr i8, ptr %lsr.iv1, i64 4 -; CHECK-NEXT: --> {(4 + %a),+,4}<%for.body> U: full-set S: full-set Exits: (1516 + %a) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {(4 + %a),+,4}<%for.body> U: full-set S: full-set Exits: (1516 + %a) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: Determining loop execution counts for: @gep_addrec_nw ; CHECK-NEXT: Loop %for.body: backedge-taken count is 378 ; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is 378 diff --git a/llvm/test/CodeGen/AArch64/min-max.ll b/llvm/test/CodeGen/AArch64/min-max.ll index 63d5632b50821..8914406f1db73 100644 --- a/llvm/test/CodeGen/AArch64/min-max.ll +++ b/llvm/test/CodeGen/AArch64/min-max.ll @@ -428,6 +428,36 @@ define i8 @umaxi8(i8 %a, i8 %b) { ret i8 %c } +define i8 @umaxi8_1(i8 %a) { +; CHECK-ISEL-LABEL: umaxi8_1: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: and w8, w0, #0xff +; CHECK-ISEL-NEXT: tst w0, #0xfe +; CHECK-ISEL-NEXT: csinc w0, w8, wzr, ne +; CHECK-ISEL-NEXT: ret +; +; CHECK-ISEL-CSSC-LABEL: umaxi8_1: +; CHECK-ISEL-CSSC: // %bb.0: +; CHECK-ISEL-CSSC-NEXT: and w8, w0, #0xff +; CHECK-ISEL-CSSC-NEXT: umax w0, w8, #1 +; CHECK-ISEL-CSSC-NEXT: ret +; +; CHECK-GLOBAL-LABEL: umaxi8_1: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: and w8, w0, #0xff +; CHECK-GLOBAL-NEXT: cmp w8, #1 +; CHECK-GLOBAL-NEXT: csinc w0, w0, wzr, hi +; CHECK-GLOBAL-NEXT: ret +; +; CHECK-GLOBAL-CSSC-LABEL: umaxi8_1: +; CHECK-GLOBAL-CSSC: // %bb.0: +; CHECK-GLOBAL-CSSC-NEXT: and w8, w0, #0xff +; CHECK-GLOBAL-CSSC-NEXT: umax w0, w8, #1 +; CHECK-GLOBAL-CSSC-NEXT: ret + %c = call i8 @llvm.umax.i8(i8 %a, i8 1) + ret i8 %c +} + declare i16 @llvm.umax.i16(i16 %a, i16 %b) readnone define i16 @umaxi16(i16 %a, i16 %b) { @@ -463,6 +493,36 @@ define i16 @umaxi16(i16 %a, i16 %b) { ret i16 %c } +define i16 @umaxi16_1(i16 %a) { +; CHECK-ISEL-LABEL: umaxi16_1: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: and w8, w0, #0xffff +; CHECK-ISEL-NEXT: tst w0, #0xfffe +; CHECK-ISEL-NEXT: csinc w0, w8, wzr, ne +; CHECK-ISEL-NEXT: ret +; +; CHECK-ISEL-CSSC-LABEL: umaxi16_1: +; CHECK-ISEL-CSSC: // %bb.0: +; CHECK-ISEL-CSSC-NEXT: and w8, w0, #0xffff +; CHECK-ISEL-CSSC-NEXT: umax w0, w8, #1 +; CHECK-ISEL-CSSC-NEXT: ret +; +; CHECK-GLOBAL-LABEL: umaxi16_1: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: and w8, w0, #0xffff +; CHECK-GLOBAL-NEXT: cmp w8, #1 +; CHECK-GLOBAL-NEXT: csinc w0, w0, wzr, hi +; CHECK-GLOBAL-NEXT: ret +; +; CHECK-GLOBAL-CSSC-LABEL: umaxi16_1: +; CHECK-GLOBAL-CSSC: // %bb.0: +; CHECK-GLOBAL-CSSC-NEXT: and w8, w0, #0xffff +; CHECK-GLOBAL-CSSC-NEXT: umax w0, w8, #1 +; CHECK-GLOBAL-CSSC-NEXT: ret + %c = call i16 @llvm.umax.i16(i16 %a, i16 1) + ret i16 %c +} + declare i32 @llvm.umax.i32(i32 %a, i32 %b) readnone define i32 @umaxi32(i32 %a, i32 %b) { @@ -491,6 +551,32 @@ define i32 @umaxi32(i32 %a, i32 %b) { ret i32 %c } +define i32 @umaxi32_1(i32 %a) { +; CHECK-ISEL-LABEL: umaxi32_1: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: cmp w0, #1 +; CHECK-ISEL-NEXT: csinc w0, w0, wzr, hi +; CHECK-ISEL-NEXT: ret +; +; CHECK-ISEL-CSSC-LABEL: umaxi32_1: +; CHECK-ISEL-CSSC: // %bb.0: +; CHECK-ISEL-CSSC-NEXT: umax w0, w0, #1 +; CHECK-ISEL-CSSC-NEXT: ret +; +; CHECK-GLOBAL-LABEL: umaxi32_1: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: cmp w0, #1 +; CHECK-GLOBAL-NEXT: csinc w0, w0, wzr, hi +; CHECK-GLOBAL-NEXT: ret +; +; CHECK-GLOBAL-CSSC-LABEL: umaxi32_1: +; CHECK-GLOBAL-CSSC: // %bb.0: +; CHECK-GLOBAL-CSSC-NEXT: umax w0, w0, #1 +; CHECK-GLOBAL-CSSC-NEXT: ret + %c = call i32 @llvm.umax.i32(i32 %a, i32 1) + ret i32 %c +} + declare i64 @llvm.umax.i64(i64 %a, i64 %b) readnone define i64 @umaxi64(i64 %a, i64 %b) { @@ -519,6 +605,32 @@ define i64 @umaxi64(i64 %a, i64 %b) { ret i64 %c } +define i64 @umaxi64_1(i64 %a) { +; CHECK-ISEL-LABEL: umaxi64_1: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: cmp x0, #1 +; CHECK-ISEL-NEXT: csinc x0, x0, xzr, hi +; CHECK-ISEL-NEXT: ret +; +; CHECK-ISEL-CSSC-LABEL: umaxi64_1: +; CHECK-ISEL-CSSC: // %bb.0: +; CHECK-ISEL-CSSC-NEXT: umax x0, x0, #1 +; CHECK-ISEL-CSSC-NEXT: ret +; +; CHECK-GLOBAL-LABEL: umaxi64_1: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: cmp x0, #1 +; CHECK-GLOBAL-NEXT: csinc x0, x0, xzr, hi +; CHECK-GLOBAL-NEXT: ret +; +; CHECK-GLOBAL-CSSC-LABEL: umaxi64_1: +; CHECK-GLOBAL-CSSC: // %bb.0: +; CHECK-GLOBAL-CSSC-NEXT: umax x0, x0, #1 +; CHECK-GLOBAL-CSSC-NEXT: ret + %c = call i64 @llvm.umax.i64(i64 %a, i64 1) + ret i64 %c +} + declare <8 x i8> @llvm.umax.v8i8(<8 x i8> %a, <8 x i8> %b) readnone define <8 x i8> @umax8i8(<8 x i8> %a, <8 x i8> %b) { diff --git a/llvm/test/CodeGen/ARM/vecreduce-minmax.ll b/llvm/test/CodeGen/ARM/vecreduce-minmax.ll new file mode 100644 index 0000000000000..c392e6ca6bfa6 --- /dev/null +++ b/llvm/test/CodeGen/ARM/vecreduce-minmax.ll @@ -0,0 +1,219 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=armv7-none-eabi -float-abi=hard -mattr=+neon -verify-machineinstrs | FileCheck %s + +define i8 @test_umin_v8i8(<8 x i8> %x) { +; CHECK-LABEL: test_umin_v8i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmin.u8 d16, d0, d0 +; CHECK-NEXT: vpmin.u8 d16, d16, d16 +; CHECK-NEXT: vpmin.u8 d16, d16, d16 +; CHECK-NEXT: vmov.u8 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %x) + ret i8 %z +} + +define i8 @test_smin_v8i8(<8 x i8> %x) { +; CHECK-LABEL: test_smin_v8i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmin.s8 d16, d0, d0 +; CHECK-NEXT: vpmin.s8 d16, d16, d16 +; CHECK-NEXT: vpmin.s8 d16, d16, d16 +; CHECK-NEXT: vmov.s8 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %x) + ret i8 %z +} + +define i8 @test_umax_v8i8(<8 x i8> %x) { +; CHECK-LABEL: test_umax_v8i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmax.u8 d16, d0, d0 +; CHECK-NEXT: vpmax.u8 d16, d16, d16 +; CHECK-NEXT: vpmax.u8 d16, d16, d16 +; CHECK-NEXT: vmov.u8 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %x) + ret i8 %z +} + +define i8 @test_smax_v8i8(<8 x i8> %x) { +; CHECK-LABEL: test_smax_v8i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmax.s8 d16, d0, d0 +; CHECK-NEXT: vpmax.s8 d16, d16, d16 +; CHECK-NEXT: vpmax.s8 d16, d16, d16 +; CHECK-NEXT: vmov.s8 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %x) + ret i8 %z +} + +define i16 @test_umin_v4i16(<4 x i16> %x) { +; CHECK-LABEL: test_umin_v4i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmin.u16 d16, d0, d0 +; CHECK-NEXT: vpmin.u16 d16, d16, d16 +; CHECK-NEXT: vmov.u16 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %x) + ret i16 %z +} + +define i16 @test_smin_v4i16(<4 x i16> %x) { +; CHECK-LABEL: test_smin_v4i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmin.s16 d16, d0, d0 +; CHECK-NEXT: vpmin.s16 d16, d16, d16 +; CHECK-NEXT: vmov.s16 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %x) + ret i16 %z +} + +define i16 @test_umax_v4i16(<4 x i16> %x) { +; CHECK-LABEL: test_umax_v4i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmax.u16 d16, d0, d0 +; CHECK-NEXT: vpmax.u16 d16, d16, d16 +; CHECK-NEXT: vmov.u16 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %x) + ret i16 %z +} + +define i16 @test_smax_v4i16(<4 x i16> %x) { +; CHECK-LABEL: test_smax_v4i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmax.s16 d16, d0, d0 +; CHECK-NEXT: vpmax.s16 d16, d16, d16 +; CHECK-NEXT: vmov.s16 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %x) + ret i16 %z +} + +define i32 @test_umin_v2i32(<2 x i32> %x) { +; CHECK-LABEL: test_umin_v2i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmin.u32 d16, d0, d0 +; CHECK-NEXT: vmov.32 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> %x) + ret i32 %z +} + +define i32 @test_smin_v2i32(<2 x i32> %x) { +; CHECK-LABEL: test_smin_v2i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmin.s32 d16, d0, d0 +; CHECK-NEXT: vmov.32 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> %x) + ret i32 %z +} + +define i32 @test_umax_v2i32(<2 x i32> %x) { +; CHECK-LABEL: test_umax_v2i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmax.u32 d16, d0, d0 +; CHECK-NEXT: vmov.32 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> %x) + ret i32 %z +} + +define i32 @test_smax_v2i32(<2 x i32> %x) { +; CHECK-LABEL: test_smax_v2i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmax.s32 d16, d0, d0 +; CHECK-NEXT: vmov.32 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> %x) + ret i32 %z +} + +define i8 @test_umin_v16i8(<16 x i8> %x) { +; CHECK-LABEL: test_umin_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmin.u8 d16, d0, d1 +; CHECK-NEXT: vpmin.u8 d16, d16, d16 +; CHECK-NEXT: vpmin.u8 d16, d16, d16 +; CHECK-NEXT: vpmin.u8 d16, d16, d16 +; CHECK-NEXT: vmov.u8 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %x) + ret i8 %z +} + +define i16 @test_smin_v8i16(<8 x i16> %x) { +; CHECK-LABEL: test_smin_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmin.s16 d16, d0, d1 +; CHECK-NEXT: vpmin.s16 d16, d16, d16 +; CHECK-NEXT: vpmin.s16 d16, d16, d16 +; CHECK-NEXT: vmov.s16 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %x) + ret i16 %z +} + +define i32 @test_umax_v4i32(<4 x i32> %x) { +; CHECK-LABEL: test_umax_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmax.u32 d16, d0, d1 +; CHECK-NEXT: vpmax.u32 d16, d16, d16 +; CHECK-NEXT: vmov.32 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %x) + ret i32 %z +} + +define i8 @test_umin_v32i8(<32 x i8> %x) { +; CHECK-LABEL: test_umin_v32i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmin.u8 q8, q0, q1 +; CHECK-NEXT: vpmin.u8 d16, d16, d17 +; CHECK-NEXT: vpmin.u8 d16, d16, d16 +; CHECK-NEXT: vpmin.u8 d16, d16, d16 +; CHECK-NEXT: vpmin.u8 d16, d16, d16 +; CHECK-NEXT: vmov.u8 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> %x) + ret i8 %z +} + +declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.smin.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.umax.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>) +declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.umax.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>) +declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.smin.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.umax.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>) + +declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>) +declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>) +declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) + +declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>) diff --git a/llvm/test/CodeGen/Generic/addr-sink-call-multi-arg.ll b/llvm/test/CodeGen/Generic/addr-sink-call-multi-arg.ll new file mode 100644 index 0000000000000..b02bdc3b57242 --- /dev/null +++ b/llvm/test/CodeGen/Generic/addr-sink-call-multi-arg.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -codegenprepare < %s | FileCheck %s +; REQUIRES: aarch64-registered-target + +; Check that we don't give up if unable to sink the first argument. + +target triple = "aarch64-linux" + +declare void @f(ptr, ptr) cold + +define void @g(i1 %c1, ptr %p, i32 %i) { +; CHECK-LABEL: @g( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A0:%.*]] = getelementptr ptr, ptr [[P:%.*]], i32 [[I:%.*]] +; CHECK-NEXT: br i1 [[C1:%.*]], label [[IF_THEN:%.*]], label [[EXIT:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[P]], i64 32 +; CHECK-NEXT: call void @f(ptr [[A0]], ptr [[SUNKADDR]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %a0 = getelementptr ptr, ptr %p, i32 %i + %a1 = getelementptr ptr, ptr %p, i32 4 + br i1 %c1, label %if.then, label %exit + +if.then: + call void @f(ptr %a0, ptr %a1) + br label %exit + +exit: + ret void +} diff --git a/llvm/test/CodeGen/Generic/addr-use-count.ll b/llvm/test/CodeGen/Generic/addr-use-count.ll new file mode 100644 index 0000000000000..a3b110bf60896 --- /dev/null +++ b/llvm/test/CodeGen/Generic/addr-use-count.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -codegenprepare < %s | FileCheck %s +; REQUIRES: aarch64-registered-target + +; Test that `%addr` is sunk, even though the number of memory uses to scan exceeds the limit. + +target triple = "aarch64-linux" + +declare void @g(...) + +define void @f(ptr %p) { +; CHECK-LABEL: @f( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 4 +; CHECK-NEXT: [[T0:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T2:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T3:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T4:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T5:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T6:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T7:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T8:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T9:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T10:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T11:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T12:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T13:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T14:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T15:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T16:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T17:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T18:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[P]], i64 8 +; CHECK-NEXT: [[T19:%.*]] = load i32, ptr [[SUNKADDR1]], align 4 +; CHECK-NEXT: call void @g(i32 [[T0]], i32 [[T1]], i32 [[T2]], i32 [[T3]], i32 [[T4]], i32 [[T5]], i32 [[T6]], i32 [[T7]], i32 [[T8]], i32 [[T9]], i32 [[T10]], i32 [[T11]], i32 [[T12]], i32 [[T13]], i32 [[T14]], i32 [[T15]], i32 [[T16]], i32 [[T17]], i32 [[T18]], i32 [[T19]]) +; CHECK-NEXT: ret void +; +entry: + %addr = getelementptr i8, ptr %p, i32 4 + br label %exit + +exit: + %t0 = load i32, ptr %addr + %t1 = load i32, ptr %addr + %t2 = load i32, ptr %addr + %t3 = load i32, ptr %addr + %t4 = load i32, ptr %addr + %t5 = load i32, ptr %addr + %t6 = load i32, ptr %addr + %t7 = load i32, ptr %addr + %t8 = load i32, ptr %addr + %t9 = load i32, ptr %addr + %t10 = load i32, ptr %addr + %t11 = load i32, ptr %addr + %t12 = load i32, ptr %addr + %t13 = load i32, ptr %addr + %t14 = load i32, ptr %addr + %t15 = load i32, ptr %addr + %t16 = load i32, ptr %addr + %t17 = load i32, ptr %addr + %t18 = load i32, ptr %addr + %addr.1 = getelementptr i8, ptr %addr, i32 4 + %t19 = load i32, ptr %addr.1 + + call void @g(i32 %t0, i32 %t1, i32 %t2, i32 %t3, i32 %t4, i32 %t5, i32 %t6, i32 %t7, + i32 %t8, i32 %t9, i32 %t10, i32 %t11, i32 %t12, i32 %t13, i32 %t14, i32 %t15, i32 %t16, + i32 %t17, i32 %t18, i32 %t19) + ret void +} diff --git a/llvm/test/CodeGen/Mips/ehframe-indirect.ll b/llvm/test/CodeGen/Mips/ehframe-indirect.ll index 59f358316ddfd..b3f4b48329d7b 100644 --- a/llvm/test/CodeGen/Mips/ehframe-indirect.ll +++ b/llvm/test/CodeGen/Mips/ehframe-indirect.ll @@ -1,13 +1,9 @@ ; RUN: llc -mtriple=mipsel-linux-gnu < %s -asm-verbose -relocation-model=pic | \ ; RUN: FileCheck -check-prefixes=ALL,LINUX,LINUX-O32,O32 %s -; RUN: llc -mtriple=mipsel-linux-android < %s -asm-verbose -relocation-model=pic | \ -; RUN: FileCheck -check-prefixes=ALL,LINUX,LINUX-O32,O32 %s ; RUN: llc -mtriple=mips64el-linux-gnu -target-abi=n32 < %s -asm-verbose -relocation-model=pic | \ ; RUN: FileCheck -check-prefixes=ALL,LINUX,LINUX-NEW,N32 %s ; RUN: llc -mtriple=mips64el-linux-gnu < %s -asm-verbose -relocation-model=pic | \ ; RUN: FileCheck -check-prefixes=ALL,LINUX,LINUX-NEW,N64 %s -; RUN: llc -mtriple=mips64el-linux-android < %s -asm-verbose -relocation-model=pic | \ -; RUN: FileCheck -check-prefixes=ALL,LINUX,LINUX-NEW,N64 %s ; RUN: llc -mtriple=mips64el-linux-gnu < %s -asm-verbose -relocation-model=pic | \ ; RUN: FileCheck -check-prefixes=ALL,LINUX,LINUX-NEW,N64 %s ; RUN: llc -mtriple=mips-unknown-freebsd11.0 < %s -asm-verbose -relocation-model=pic | \ diff --git a/llvm/test/CodeGen/Mips/emutls_generic.ll b/llvm/test/CodeGen/Mips/emutls_generic.ll deleted file mode 100644 index 344a581d6b4b7..0000000000000 --- a/llvm/test/CodeGen/Mips/emutls_generic.ll +++ /dev/null @@ -1,75 +0,0 @@ -; RUN: llc < %s -emulated-tls -mtriple=mipsel-linux-android -relocation-model=pic \ -; RUN: | FileCheck -check-prefix=MIPS_32 %s -; RUN: llc < %s -emulated-tls -mtriple=mips64el-linux-android -relocation-model=pic \ -; RUN: | FileCheck -check-prefix=MIPS_64 %s - -; RUN: llc < %s -mtriple=mipsel-linux-android -relocation-model=pic \ -; RUN: | FileCheck -check-prefix=MIPS_32 %s -; RUN: llc < %s -mtriple=mips64el-linux-android -relocation-model=pic \ -; RUN: | FileCheck -check-prefix=MIPS_64 %s - -; Make sure that TLS symbols are emitted in expected order. - -@external_x = external thread_local global i32, align 8 -@external_y = thread_local global i8 7, align 2 -@internal_y = internal thread_local global i64 9, align 16 - -define ptr @get_external_x() { -entry: - ret ptr @external_x -} - -define ptr @get_external_y() { -entry: - ret ptr @external_y -} - -define ptr @get_internal_y() { -entry: - ret ptr @internal_y -} - -; MIPS_32-LABEL: get_external_y: -; MIPS_32-LABEL: get_internal_y: -; MIPS_32: lw {{.+}}(__emutls_v.internal_y -; MIPS_32: lw {{.+}}call16(__emutls_get_address -; MIPS_32-NOT: __emutls_t.external_x -; MIPS_32-NOT: __emutls_v.external_x: -; MIPS_32: .data -; MIPS_32: .p2align 2 -; MIPS_32-LABEL: __emutls_v.external_y: -; MIPS_32: .section .rodata, -; MIPS_32-LABEL: __emutls_t.external_y: -; MIPS_32-NEXT: .byte 7 -; MIPS_32: .data -; MIPS_32: .p2align 2 -; MIPS_32-LABEL: __emutls_v.internal_y: -; MIPS_32-NEXT: .4byte 8 -; MIPS_32-NEXT: .4byte 16 -; MIPS_32-NEXT: .4byte 0 -; MIPS_32-NEXT: .4byte __emutls_t.internal_y -; MIPS_32-LABEL: __emutls_t.internal_y: -; MIPS_32-NEXT: .8byte 9 - -; MIPS_64-LABEL: get_external_x: -; MIPS_64-LABEL: get_external_y: -; MIPS_64-LABEL: get_internal_y: -; MIPS_64: ld {{.+}}(__emutls_v.internal_y -; MIPS_64: ld {{.+}}call16(__emutls_get_address -; MIPS_64-NOT: __emutls_t.external_x -; MIPS_64-NOT: __emutls_v.external_x: -; MIPS_64-LABEL: __emutls_v.external_y: -; MIPS_64-NOT: __emutls_v.external_x: -; MIPS_64: .section .rodata, -; MIPS_64-LABEL: __emutls_t.external_y: -; MIPS_64-NEXT: .byte 7 -; MIPS_64: .data -; MIPS_64: .p2align 3 -; MIPS_64-LABEL: __emutls_v.internal_y: -; MIPS_64-NEXT: .8byte 8 -; MIPS_64-NEXT: .8byte 16 -; MIPS_64-NEXT: .8byte 0 -; MIPS_64-NEXT: .8byte __emutls_t.internal_y -; MIPS_64: .section .rodata, -; MIPS_64-LABEL: __emutls_t.internal_y: -; MIPS_64-NEXT: .8byte 9 diff --git a/llvm/test/CodeGen/NVPTX/generic-to-nvvm-ir.ll b/llvm/test/CodeGen/NVPTX/generic-to-nvvm-ir.ll index 51344b474d29e..daed7c1c98f0b 100644 --- a/llvm/test/CodeGen/NVPTX/generic-to-nvvm-ir.ll +++ b/llvm/test/CodeGen/NVPTX/generic-to-nvvm-ir.ll @@ -1,6 +1,7 @@ ; Verify functionality of NVPTXGenericToNVVM.cpp pass. ; ; RUN: opt < %s -march nvptx64 -S -generic-to-nvvm | FileCheck %s +; RUN: opt < %s -march nvptx64 -S -passes='generic-to-nvvm' | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-roptr.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-roptr.ll new file mode 100644 index 0000000000000..532d17e087e5b --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-roptr.ll @@ -0,0 +1,30 @@ +; RUN: llc -mtriple powerpc-ibm-aix-xcoff -mroptr < %s | FileCheck %s +; RUN: llc -mtriple powerpc-ibm-aix-xcoff -mroptr -filetype=obj -o %t.o < %s +; RUN: llvm-objdump -t --symbol-description %t.o | FileCheck %s --check-prefix=OBJ + +; RUN: not llc -mtriple powerpc-ibm-aix-xcoff -mroptr -data-sections=false \ +; RUN: < %s 2>&1 | FileCheck %s --check-prefix=DS_ERR + +; DS_ERR: -mroptr option must be used with -data-sections + +%union.U = type { %"struct.U::A" } +%"struct.U::A" = type { ptr } + +@_ZL1p = internal constant i32 ptrtoint (ptr @_ZL1p to i32), align 4 +; CHECK: .csect _ZL1p[RO],2 +; CHECK-NEXT: .lglobl _ZL1p[RO] +; CHECK-NEXT: .align 2 +; CHECK-NEXT: .vbyte 4, _ZL1p[RO] +; OBJ-DAG: {{([[:xdigit:]]{8})}} l .text {{([[:xdigit:]]{8})}} (idx: [[#]]) _ZL1p[RO] +@q = thread_local constant ptr @_ZL1p, align 4 +; CHECK: .csect q[TL],2 +; CHECK-NEXT: .globl q[TL] +; CHECK-NEXT: .align 2 +; CHECK-NEXT: .vbyte 4, _ZL1p[RO] +; OBJ-DAG: {{([[:xdigit:]]{8})}} g O .tdata {{([[:xdigit:]]{8})}} (idx: [[#]]) q[TL] +@u = local_unnamed_addr constant [1 x %union.U] [%union.U { %"struct.U::A" { ptr @_ZL1p } }], align 4 +; CHECK: .csect u[RO],2 +; CHECK-NEXT: .globl u[RO] +; CHECK-NEXT: .align 2 +; CHECK-NEXT: .vbyte 4, _ZL1p[RO] +; OBJ-DAG: {{([[:xdigit:]]{8})}} g .text {{([[:xdigit:]]{8})}} (idx: [[#]]) u[RO] diff --git a/llvm/test/CodeGen/PowerPC/aix64-xcoff-roptr.ll b/llvm/test/CodeGen/PowerPC/aix64-xcoff-roptr.ll new file mode 100644 index 0000000000000..aff753661b0e1 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix64-xcoff-roptr.ll @@ -0,0 +1,33 @@ +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -mroptr < %s | FileCheck %s +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -mroptr -filetype=obj -o %t.o < %s +; RUN: llvm-objdump -t --symbol-description %t.o | FileCheck %s --check-prefix=OBJ + +; RUN: not llc -mtriple powerpc64-ibm-aix-xcoff -mroptr -data-sections=false \ +; RUN: < %s 2>&1 | FileCheck %s --check-prefix=DS_ERR +; RUN: not llc -mtriple powerpc64le-unknown-linux-gnu -mroptr \ +; RUN: < %s 2>&1 | FileCheck %s --check-prefix=OS_ERR + +; DS_ERR: -mroptr option must be used with -data-sections +; OS_ERR: -mroptr option is only supported on AIX + +%union.U = type { %"struct.U::A" } +%"struct.U::A" = type { ptr } + +@_ZL1p = internal constant i64 ptrtoint (ptr @_ZL1p to i64), align 8 +; CHECK: .csect _ZL1p[RO],3 +; CHECK-NEXT: .lglobl _ZL1p[RO] +; CHECK-NEXT: .align 3 +; CHECK-NEXT: .vbyte 8, _ZL1p[RO] +; OBJ-DAG: {{([[:xdigit:]]{16})}} l .text {{([[:xdigit:]]{16})}} (idx: [[#]]) _ZL1p[RO] +@q = thread_local constant ptr @_ZL1p, align 8 +; CHECK: .csect q[TL],3 +; CHECK-NEXT: .globl q[TL] +; CHECK-NEXT: .align 3 +; CHECK-NEXT: .vbyte 8, _ZL1p[RO] +; OBJ-DAG: {{([[:xdigit:]]{16})}} g O .tdata {{([[:xdigit:]]{16})}} (idx: [[#]]) q[TL] +@u = local_unnamed_addr constant [1 x %union.U] [%union.U { %"struct.U::A" { ptr @_ZL1p } }], align 8 +; CHECK: .csect u[RO],3 +; CHECK-NEXT: .globl u[RO] +; CHECK-NEXT: .align 3 +; CHECK-NEXT: .vbyte 8, _ZL1p[RO] +; OBJ-DAG: {{([[:xdigit:]]{16})}} g .text {{([[:xdigit:]]{16})}} (idx: [[#]]) u[RO] diff --git a/llvm/test/CodeGen/PowerPC/pgo-ref-directive.ll b/llvm/test/CodeGen/PowerPC/pgo-ref-directive.ll index 172affa4a2661..201af2f949618 100644 --- a/llvm/test/CodeGen/PowerPC/pgo-ref-directive.ll +++ b/llvm/test/CodeGen/PowerPC/pgo-ref-directive.ll @@ -1,9 +1,22 @@ ; RUN: rm -rf %t && split-file %s %t -; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff -xcoff-traceback-table=false < %t/no-ref.ll | FileCheck %s --check-prefixes=NOREF -; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff -xcoff-traceback-table=false < %t/no-vnds.ll | FileCheck %s --check-prefixes=NOVNDS -; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff -xcoff-traceback-table=false < %t/with-vnds.ll | FileCheck %s --check-prefixes=WITHVNDS +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \ +; RUN: -xcoff-traceback-table=false < %t/no-ref.ll | FileCheck %s --check-prefixes=NOREF +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \ +; RUN: -xcoff-traceback-table=false --filetype=obj < %t/no-ref.ll -o %t/no-ref.o +; RUN: llvm-objdump %t/no-ref.o -r | FileCheck %s --check-prefix=NOREF-OBJ +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \ +; RUN: -xcoff-traceback-table=false < %t/no-vnds.ll | FileCheck %s --check-prefixes=NOVNDS +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \ +; RUN: -xcoff-traceback-table=false --filetype=obj < %t/no-vnds.ll -o %t/no-vnds.o +; RUN: llvm-objdump %t/no-vnds.o -r | FileCheck %s --check-prefix=NOVNDS-OBJ + +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \ +; RUN: -xcoff-traceback-table=false < %t/with-vnds.ll | FileCheck %s --check-prefixes=WITHVNDS +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \ +; RUN: -xcoff-traceback-table=false --filetype=obj < %t/with-vnds.ll -o %t/with-vnds.o +; RUN: llvm-objdump %t/with-vnds.o -tr | FileCheck %s --check-prefix=WITHVNDS-OBJ ;--- no-ref.ll ; The absence of a __llvm_prf_cnts section should stop generating the .refs. @@ -27,6 +40,10 @@ entry: ; NOREF-NOT: .ref __llvm_prf_names ; NOREF-NOT: .ref __llvm_prf_vnds +; NOREF-OBJ-NOT: R_REF __llvm_prf_data +; NOREF-OBJ-NOT: R_REF __llvm_prf_names +; NOREF-OBJ-NOT: R_REF __llvm_prf_vnds + ;--- no-vnds.ll ; This is the most common case. When -fprofile-generate is used and there exists executable code, we generate the __llvm_prf_cnts, __llvm_prf_data, and __llvm_prf_names sections. ; @@ -56,6 +73,10 @@ entry: ; NOVNDS-NEXT: .ref __llvm_prf_names[RO] ; NOVNDS-NOT: .ref __llvm_prf_vnds +; NOVNDS-OBJ: 00000008 R_REF __llvm_prf_data +; NOVNDS-OBJ: 00000008 R_REF __llvm_prf_names +; NOVNDS-OBJ-NOT: R_REF __llvm_prf_vnds + ;--- with-vnds.ll ; When value profiling is needed, the PGO instrumentation generates variables in the __llvm_prf_vnds section, so we generate a .ref for them too. ; @@ -80,3 +101,22 @@ entry: ; WITHVNDS-NEXT: .ref __llvm_prf_data[RW] ; WITHVNDS-NEXT: .ref __llvm_prf_names[RO] ; WITHVNDS-NEXT: .ref __llvm_prf_vnds[RW] + +; WITHVNDS-OBJ: SYMBOL TABLE: +; WITHVNDS-OBJ-NEXT: 00000000 df *DEBUG* 00000000 +; WITHVNDS-OBJ-NEXT: 00000000 l .text 00000008 .text +; WITHVNDS-OBJ-NEXT: 00000000 g F .text (csect: .text) 00000000 .main +; WITHVNDS-OBJ-NEXT: 00000008 l .text 00000006 __llvm_prf_names +; WITHVNDS-OBJ-NEXT: 00000010 l O .data 00000008 __llvm_prf_cnts +; WITHVNDS-OBJ-NEXT: 00000018 l O .data 00000008 __llvm_prf_data +; WITHVNDS-OBJ-NEXT: 00000020 l O .data 000000f0 __llvm_prf_vnds +; WITHVNDS-OBJ-NEXT: 00000110 g O .data 0000000c main +; WITHVNDS-OBJ-NEXT: 0000011c l .data 00000000 TOC + +; WITHVNDS-OBJ: RELOCATION RECORDS FOR [.data]: +; WITHVNDS-OBJ-NEXT: OFFSET TYPE VALUE +; WITHVNDS-OBJ-NEXT: 00000008 R_REF __llvm_prf_data +; WITHVNDS-OBJ-NEXT: 00000008 R_REF __llvm_prf_names +; WITHVNDS-OBJ-NEXT: 00000008 R_REF __llvm_prf_vnds +; WITHVNDS-OBJ-NEXT: 00000100 R_POS .main +; WITHVNDS-OBJ-NEXT: 00000104 R_POS TOC diff --git a/llvm/test/CodeGen/RISCV/alu64.ll b/llvm/test/CodeGen/RISCV/alu64.ll index 5349c82ef0f0f..29eb12f7f5429 100644 --- a/llvm/test/CodeGen/RISCV/alu64.ll +++ b/llvm/test/CodeGen/RISCV/alu64.ll @@ -530,3 +530,76 @@ define signext i32 @sraw(i64 %a, i32 zeroext %b) nounwind { %2 = ashr i32 %1, %b ret i32 %2 } + +define i64 @add_hi_and_lo_negone(i64 %0) { +; RV64I-LABEL: add_hi_and_lo_negone: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: add_hi_and_lo_negone: +; RV32I: # %bb.0: +; RV32I-NEXT: seqz a2, a0 +; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: ret + %2 = add nsw i64 %0, -1 + ret i64 %2 +} + +define i64 @add_hi_zero_lo_negone(i64 %0) { +; RV64I-LABEL: add_hi_zero_lo_negone: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: add_hi_zero_lo_negone: +; RV32I: # %bb.0: +; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: ret + %2 = add i64 %0, 4294967295 + ret i64 %2 +} + +define i64 @add_lo_negone(i64 %0) { +; RV64I-LABEL: add_lo_negone: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: add_lo_negone: +; RV32I: # %bb.0: +; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: addi a1, a1, -2 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: ret + %2 = add nsw i64 %0, -4294967297 + ret i64 %2 +} + +define i64 @add_hi_one_lo_negone(i64 %0) { +; RV64I-LABEL: add_hi_one_lo_negone: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: srli a1, a1, 31 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: add_hi_one_lo_negone: +; RV32I: # %bb.0: +; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: addi a1, a1, 1 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: ret + %2 = add nsw i64 %0, 8589934591 + ret i64 %2 +} diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll index f1528e94c473c..dc4b50215ab0a 100644 --- a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll @@ -1263,10 +1263,9 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { ; RV32I-NEXT: bnez a0, .LBB7_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 +; RV32I-NEXT: seqz a0, a4 +; RV32I-NEXT: sub a3, a5, a0 ; RV32I-NEXT: addi a2, a4, -1 -; RV32I-NEXT: sltu a0, a2, a4 -; RV32I-NEXT: add a0, a5, a0 -; RV32I-NEXT: addi a3, a0, -1 ; RV32I-NEXT: j .LBB7_1 ; RV32I-NEXT: .LBB7_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 @@ -1327,10 +1326,9 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { ; RV32IA-NEXT: bnez a0, .LBB7_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 +; RV32IA-NEXT: seqz a0, a4 +; RV32IA-NEXT: sub a3, a5, a0 ; RV32IA-NEXT: addi a2, a4, -1 -; RV32IA-NEXT: sltu a0, a2, a4 -; RV32IA-NEXT: add a0, a5, a0 -; RV32IA-NEXT: addi a3, a0, -1 ; RV32IA-NEXT: j .LBB7_1 ; RV32IA-NEXT: .LBB7_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 diff --git a/llvm/test/CodeGen/RISCV/mattr-invalid-combination.ll b/llvm/test/CodeGen/RISCV/mattr-invalid-combination.ll deleted file mode 100644 index e5bdb96fd0741..0000000000000 --- a/llvm/test/CodeGen/RISCV/mattr-invalid-combination.ll +++ /dev/null @@ -1,5 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: not --crash llc -mtriple=riscv64 -mattr=+e < %s 2>&1 \ -; RUN: | FileCheck -check-prefix=RV64E %s - -; RV64E: LLVM ERROR: RV32E can't be enabled for an RV64 target diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll index 543c17f748a45..acad770b693d2 100644 --- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll @@ -666,14 +666,13 @@ define i1 @uaddo_i64_increment_alt_dom(i64 %x, ptr %p) { define i1 @uaddo_i64_decrement_alt(i64 %x, ptr %p) { ; RV32-LABEL: uaddo_i64_decrement_alt: ; RV32: # %bb.0: -; RV32-NEXT: addi a3, a0, -1 -; RV32-NEXT: sltu a4, a3, a0 -; RV32-NEXT: add a4, a1, a4 -; RV32-NEXT: addi a4, a4, -1 -; RV32-NEXT: sw a3, 0(a2) +; RV32-NEXT: seqz a3, a0 +; RV32-NEXT: sub a3, a1, a3 +; RV32-NEXT: addi a4, a0, -1 +; RV32-NEXT: sw a4, 0(a2) ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: snez a0, a0 -; RV32-NEXT: sw a4, 4(a2) +; RV32-NEXT: sw a3, 4(a2) ; RV32-NEXT: ret ; ; RV64-LABEL: uaddo_i64_decrement_alt: @@ -695,12 +694,11 @@ define i1 @uaddo_i64_decrement_alt_dom(i64 %x, ptr %p) { ; RV32: # %bb.0: ; RV32-NEXT: or a3, a0, a1 ; RV32-NEXT: snez a3, a3 -; RV32-NEXT: addi a4, a0, -1 -; RV32-NEXT: sltu a0, a4, a0 -; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: seqz a4, a0 +; RV32-NEXT: sub a1, a1, a4 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: sw a4, 0(a2) -; RV32-NEXT: sw a0, 4(a2) +; RV32-NEXT: sw a0, 0(a2) +; RV32-NEXT: sw a1, 4(a2) ; RV32-NEXT: mv a0, a3 ; RV32-NEXT: ret ; @@ -1222,22 +1220,21 @@ define i64 @foo2(ptr %p) { define void @PR41129(ptr %p64) { ; RV32-LABEL: PR41129: ; RV32: # %bb.0: # %entry -; RV32-NEXT: lw a1, 4(a0) -; RV32-NEXT: lw a2, 0(a0) -; RV32-NEXT: or a3, a2, a1 +; RV32-NEXT: lw a2, 4(a0) +; RV32-NEXT: lw a1, 0(a0) +; RV32-NEXT: or a3, a1, a2 ; RV32-NEXT: beqz a3, .LBB36_2 ; RV32-NEXT: # %bb.1: # %false -; RV32-NEXT: andi a2, a2, 7 +; RV32-NEXT: andi a1, a1, 7 ; RV32-NEXT: sw zero, 4(a0) -; RV32-NEXT: sw a2, 0(a0) +; RV32-NEXT: sw a1, 0(a0) ; RV32-NEXT: ret ; RV32-NEXT: .LBB36_2: # %true -; RV32-NEXT: addi a3, a2, -1 -; RV32-NEXT: sltu a2, a3, a2 -; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: seqz a3, a1 +; RV32-NEXT: sub a2, a2, a3 ; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: sw a3, 0(a0) -; RV32-NEXT: sw a1, 4(a0) +; RV32-NEXT: sw a1, 0(a0) +; RV32-NEXT: sw a2, 4(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: PR41129: diff --git a/llvm/test/CodeGen/RISCV/rv32e.ll b/llvm/test/CodeGen/RISCV/rv32e.ll deleted file mode 100644 index 88379ab438725..0000000000000 --- a/llvm/test/CodeGen/RISCV/rv32e.ll +++ /dev/null @@ -1,7 +0,0 @@ -; RUN: not --crash llc -mtriple=riscv32 -mattr=+e < %s 2>&1 | FileCheck %s - -; CHECK: LLVM ERROR: Codegen not yet implemented for RV32E - -define void @nothing() nounwind { - ret void -} diff --git a/llvm/test/CodeGen/RISCV/rve.ll b/llvm/test/CodeGen/RISCV/rve.ll new file mode 100644 index 0000000000000..29b9bab61f7ff --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rve.ll @@ -0,0 +1,8 @@ +; RUN: not --crash llc -mtriple=riscv32 -mattr=+e < %s 2>&1 | FileCheck %s +; RUN: not --crash llc -mtriple=riscv64 -mattr=+e < %s 2>&1 | FileCheck %s + +; CHECK: LLVM ERROR: Codegen not yet implemented for RVE + +define void @nothing() nounwind { + ret void +} diff --git a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll index 58469ca23bb54..6be6785fc1d0e 100644 --- a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll +++ b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll @@ -454,10 +454,9 @@ define i32 @sext_of_not_i32(i1 %x) { define i64 @sext_of_not_i64(i1 %x) { ; RV32I-LABEL: sext_of_not_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a1, a0, 1 -; RV32I-NEXT: addi a0, a1, -1 -; RV32I-NEXT: sltu a1, a0, a1 -; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sext_of_not_i64: @@ -541,10 +540,9 @@ define i64 @dec_of_zexted_cmp_i64(i64 %x) { ; RV32I: # %bb.0: ; RV32I-NEXT: xori a0, a0, 7 ; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: seqz a1, a0 -; RV32I-NEXT: addi a0, a1, -1 -; RV32I-NEXT: sltu a1, a0, a1 -; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: dec_of_zexted_cmp_i64: diff --git a/llvm/test/CodeGen/WebAssembly/tailcall.ll b/llvm/test/CodeGen/WebAssembly/tailcall.ll index 07cdea1ec9b0f..84bd142462e37 100644 --- a/llvm/test/CodeGen/WebAssembly/tailcall.ll +++ b/llvm/test/CodeGen/WebAssembly/tailcall.ll @@ -1,5 +1,6 @@ -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=+tail-call | FileCheck --check-prefixes=CHECK,SLOW %s -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -fast-isel -mcpu=mvp -mattr=+tail-call | FileCheck --check-prefixes=CHECK,FAST %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=+tail-call | FileCheck --check-prefixes=CHECK,SLOW %s +; RUN: llc < %s -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -fast-isel -mcpu=mvp -mattr=+tail-call | FileCheck --check-prefixes=CHECK,FAST %s ; RUN: llc < %s --filetype=obj -mattr=+tail-call | obj2yaml | FileCheck --check-prefix=YAML %s ; Test that the tail calls lower correctly @@ -10,101 +11,177 @@ target triple = "wasm32-unknown-unknown" declare i1 @foo(i1) declare i1 @bar(i1) -; CHECK-LABEL: recursive_notail_nullary: -; CHECK: {{^}} call recursive_notail_nullary{{$}} -; CHECK-NEXT: return define void @recursive_notail_nullary() { +; CHECK-LABEL: recursive_notail_nullary: +; CHECK: .functype recursive_notail_nullary () -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: call recursive_notail_nullary +; CHECK-NEXT: return notail call void @recursive_notail_nullary() ret void } -; CHECK-LABEL: recursive_musttail_nullary: -; CHECK: return_call recursive_musttail_nullary{{$}} define void @recursive_musttail_nullary() { +; CHECK-LABEL: recursive_musttail_nullary: +; CHECK: .functype recursive_musttail_nullary () -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: return_call recursive_musttail_nullary musttail call void @recursive_musttail_nullary() ret void } - -; CHECK-LABEL: recursive_tail_nullary: -; SLOW: return_call recursive_tail_nullary{{$}} -; FAST: {{^}} call recursive_tail_nullary{{$}} -; FAST-NEXT: return{{$}} define void @recursive_tail_nullary() { +; SLOW-LABEL: recursive_tail_nullary: +; SLOW: .functype recursive_tail_nullary () -> () +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: return_call recursive_tail_nullary +; +; FAST-LABEL: recursive_tail_nullary: +; FAST: .functype recursive_tail_nullary () -> () +; FAST-NEXT: # %bb.0: +; FAST-NEXT: call recursive_tail_nullary +; FAST-NEXT: return tail call void @recursive_tail_nullary() ret void } -; CHECK-LABEL: recursive_notail: -; CHECK: call $push[[L:[0-9]+]]=, recursive_notail, $0, $1{{$}} -; CHECK-NEXT: return $pop[[L]]{{$}} define i32 @recursive_notail(i32 %x, i32 %y) { +; CHECK-LABEL: recursive_notail: +; CHECK: .functype recursive_notail (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: call $push0=, recursive_notail, $0, $1 +; CHECK-NEXT: return $pop0 %v = notail call i32 @recursive_notail(i32 %x, i32 %y) ret i32 %v } -; CHECK-LABEL: recursive_musttail: -; CHECK: return_call recursive_musttail, $0, $1{{$}} define i32 @recursive_musttail(i32 %x, i32 %y) { +; CHECK-LABEL: recursive_musttail: +; CHECK: .functype recursive_musttail (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: return_call recursive_musttail, $0, $1 %v = musttail call i32 @recursive_musttail(i32 %x, i32 %y) ret i32 %v } -; CHECK-LABEL: recursive_tail: -; SLOW: return_call recursive_tail, $0, $1{{$}} -; FAST: call $push[[L:[0-9]+]]=, recursive_tail, $0, $1{{$}} -; FAST-NEXT: return $pop[[L]]{{$}} define i32 @recursive_tail(i32 %x, i32 %y) { +; SLOW-LABEL: recursive_tail: +; SLOW: .functype recursive_tail (i32, i32) -> (i32) +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: return_call recursive_tail, $0, $1 +; +; FAST-LABEL: recursive_tail: +; FAST: .functype recursive_tail (i32, i32) -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: call $push0=, recursive_tail, $0, $1 +; FAST-NEXT: return $pop0 %v = tail call i32 @recursive_tail(i32 %x, i32 %y) ret i32 %v } -; CHECK-LABEL: indirect_notail: -; CHECK: call_indirect $push[[L:[0-9]+]]=, $0, $1, $2, $0{{$}} -; CHECK-NEXT: return $pop[[L]]{{$}} define i32 @indirect_notail(%fn %f, i32 %x, i32 %y) { +; CHECK-LABEL: indirect_notail: +; CHECK: .functype indirect_notail (i32, i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: call_indirect $push0=, $0, $1, $2, $0 # Invalid depth argument! +; CHECK-NEXT: return $pop0 %p = extractvalue %fn %f, 0 %v = notail call i32 %p(%fn %f, i32 %x, i32 %y) ret i32 %v } -; CHECK-LABEL: indirect_musttail: -; CHECK: return_call_indirect , $0, $1, $2, $0{{$}} define i32 @indirect_musttail(%fn %f, i32 %x, i32 %y) { +; CHECK-LABEL: indirect_musttail: +; CHECK: .functype indirect_musttail (i32, i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: return_call_indirect , $0, $1, $2, $0 %p = extractvalue %fn %f, 0 %v = musttail call i32 %p(%fn %f, i32 %x, i32 %y) ret i32 %v } -; CHECK-LABEL: indirect_tail: -; CHECK: return_call_indirect , $0, $1, $2, $0{{$}} define i32 @indirect_tail(%fn %f, i32 %x, i32 %y) { +; CHECK-LABEL: indirect_tail: +; CHECK: .functype indirect_tail (i32, i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: return_call_indirect , $0, $1, $2, $0 %p = extractvalue %fn %f, 0 %v = tail call i32 %p(%fn %f, i32 %x, i32 %y) ret i32 %v } -; CHECK-LABEL: choice_notail: -; CHECK: call_indirect $push[[L:[0-9]+]]=, $0, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[L]]{{$}} define i1 @choice_notail(i1 %x) { +; SLOW-LABEL: choice_notail: +; SLOW: .functype choice_notail (i32) -> (i32) +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: i32.const $push3=, foo +; SLOW-NEXT: i32.const $push2=, bar +; SLOW-NEXT: i32.const $push0=, 1 +; SLOW-NEXT: i32.and $push1=, $0, $pop0 +; SLOW-NEXT: i32.select $push4=, $pop3, $pop2, $pop1 +; SLOW-NEXT: call_indirect $push5=, $0, $pop4 # Invalid depth argument! +; SLOW-NEXT: return $pop5 +; +; FAST-LABEL: choice_notail: +; FAST: .functype choice_notail (i32) -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.const $push3=, foo +; FAST-NEXT: i32.const $push4=, bar +; FAST-NEXT: i32.const $push1=, 1 +; FAST-NEXT: i32.and $push2=, $0, $pop1 +; FAST-NEXT: i32.select $push5=, $pop3, $pop4, $pop2 +; FAST-NEXT: call_indirect $push0=, $0, $pop5 # Invalid depth argument! +; FAST-NEXT: return $pop0 %p = select i1 %x, ptr @foo, ptr @bar %v = notail call i1 %p(i1 %x) ret i1 %v } -; CHECK-LABEL: choice_musttail: -; CHECK: return_call_indirect , $0, $pop{{[0-9]+}}{{$}} define i1 @choice_musttail(i1 %x) { +; SLOW-LABEL: choice_musttail: +; SLOW: .functype choice_musttail (i32) -> (i32) +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: i32.const $push3=, foo +; SLOW-NEXT: i32.const $push2=, bar +; SLOW-NEXT: i32.const $push0=, 1 +; SLOW-NEXT: i32.and $push1=, $0, $pop0 +; SLOW-NEXT: i32.select $push4=, $pop3, $pop2, $pop1 +; SLOW-NEXT: return_call_indirect , $0, $pop4 +; +; FAST-LABEL: choice_musttail: +; FAST: .functype choice_musttail (i32) -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.const $push4=, foo +; FAST-NEXT: i32.const $push3=, bar +; FAST-NEXT: i32.const $push1=, 1 +; FAST-NEXT: i32.and $push2=, $0, $pop1 +; FAST-NEXT: i32.select $push0=, $pop4, $pop3, $pop2 +; FAST-NEXT: return_call_indirect , $0, $pop0 %p = select i1 %x, ptr @foo, ptr @bar %v = musttail call i1 %p(i1 %x) ret i1 %v } -; CHECK-LABEL: choice_tail: -; SLOW: return_call_indirect , $0, $pop{{[0-9]+}}{{$}} -; FAST: call_indirect $push[[L:[0-9]+]]=, $0, $pop{{[0-9]+}}{{$}} -; FAST: return $pop[[L]]{{$}} define i1 @choice_tail(i1 %x) { +; SLOW-LABEL: choice_tail: +; SLOW: .functype choice_tail (i32) -> (i32) +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: i32.const $push3=, foo +; SLOW-NEXT: i32.const $push2=, bar +; SLOW-NEXT: i32.const $push0=, 1 +; SLOW-NEXT: i32.and $push1=, $0, $pop0 +; SLOW-NEXT: i32.select $push4=, $pop3, $pop2, $pop1 +; SLOW-NEXT: return_call_indirect , $0, $pop4 +; +; FAST-LABEL: choice_tail: +; FAST: .functype choice_tail (i32) -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.const $push3=, foo +; FAST-NEXT: i32.const $push4=, bar +; FAST-NEXT: i32.const $push1=, 1 +; FAST-NEXT: i32.and $push2=, $0, $pop1 +; FAST-NEXT: i32.select $push5=, $pop3, $pop4, $pop2 +; FAST-NEXT: call_indirect $push0=, $0, $pop5 # Invalid depth argument! +; FAST-NEXT: return $pop0 %p = select i1 %x, ptr @foo, ptr @bar %v = tail call i1 %p(i1 %x) ret i1 %v @@ -114,95 +191,200 @@ define i1 @choice_tail(i1 %x) { ; prototype than its caller, so the following tests can only be done with ; 'tail'. -; CHECK-LABEL: mismatched_prototypes: -; SLOW: return_call baz, $pop{{[0-9]+}}, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; FAST: call $push[[L:[0-9]+]]=, baz, $pop{{[0-9]+}}, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; FAST: return $pop[[L]]{{$}} declare i32 @baz(i32, i32, i32) define i32 @mismatched_prototypes() { +; SLOW-LABEL: mismatched_prototypes: +; SLOW: .functype mismatched_prototypes () -> (i32) +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: i32.const $push2=, 0 +; SLOW-NEXT: i32.const $push1=, 42 +; SLOW-NEXT: i32.const $push0=, 6 +; SLOW-NEXT: return_call baz, $pop2, $pop1, $pop0 +; +; FAST-LABEL: mismatched_prototypes: +; FAST: .functype mismatched_prototypes () -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.const $push1=, 0 +; FAST-NEXT: i32.const $push2=, 42 +; FAST-NEXT: i32.const $push3=, 6 +; FAST-NEXT: call $push0=, baz, $pop1, $pop2, $pop3 +; FAST-NEXT: return $pop0 %v = tail call i32 @baz(i32 0, i32 42, i32 6) ret i32 %v } -; CHECK-LABEL: mismatched_return_void: -; CHECK: call $drop=, baz, $pop{{[0-9]+}}, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; CHECK: return{{$}} define void @mismatched_return_void() { +; SLOW-LABEL: mismatched_return_void: +; SLOW: .functype mismatched_return_void () -> () +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: i32.const $push2=, 0 +; SLOW-NEXT: i32.const $push1=, 42 +; SLOW-NEXT: i32.const $push0=, 6 +; SLOW-NEXT: call $drop=, baz, $pop2, $pop1, $pop0 +; SLOW-NEXT: return +; +; FAST-LABEL: mismatched_return_void: +; FAST: .functype mismatched_return_void () -> () +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.const $push0=, 0 +; FAST-NEXT: i32.const $push1=, 42 +; FAST-NEXT: i32.const $push2=, 6 +; FAST-NEXT: call $drop=, baz, $pop0, $pop1, $pop2 +; FAST-NEXT: return %v = tail call i32 @baz(i32 0, i32 42, i32 6) ret void } -; CHECK-LABEL: mismatched_return_f32: -; CHECK: call $push[[L:[0-9]+]]=, baz, $pop{{[0-9]+}}, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; CHECK: f32.reinterpret_i32 $push[[L1:[0-9]+]]=, $pop[[L]]{{$}} -; CHECK: return $pop[[L1]]{{$}} define float @mismatched_return_f32() { +; SLOW-LABEL: mismatched_return_f32: +; SLOW: .functype mismatched_return_f32 () -> (f32) +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: i32.const $push2=, 0 +; SLOW-NEXT: i32.const $push1=, 42 +; SLOW-NEXT: i32.const $push0=, 6 +; SLOW-NEXT: call $push3=, baz, $pop2, $pop1, $pop0 +; SLOW-NEXT: f32.reinterpret_i32 $push4=, $pop3 +; SLOW-NEXT: return $pop4 +; +; FAST-LABEL: mismatched_return_f32: +; FAST: .functype mismatched_return_f32 () -> (f32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.const $push2=, 0 +; FAST-NEXT: i32.const $push3=, 42 +; FAST-NEXT: i32.const $push4=, 6 +; FAST-NEXT: call $push1=, baz, $pop2, $pop3, $pop4 +; FAST-NEXT: f32.reinterpret_i32 $push0=, $pop1 +; FAST-NEXT: return $pop0 %v = tail call i32 @baz(i32 0, i32 42, i32 6) %u = bitcast i32 %v to float ret float %u } -; CHECK-LABEL: mismatched_indirect_void: -; CHECK: call_indirect $drop=, $0, $1, $2, $0{{$}} -; CHECK: return{{$}} define void @mismatched_indirect_void(%fn %f, i32 %x, i32 %y) { +; CHECK-LABEL: mismatched_indirect_void: +; CHECK: .functype mismatched_indirect_void (i32, i32, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: call_indirect $drop=, $0, $1, $2, $0 # Invalid depth argument! +; CHECK-NEXT: return %p = extractvalue %fn %f, 0 %v = tail call i32 %p(%fn %f, i32 %x, i32 %y) ret void } -; CHECK-LABEL: mismatched_indirect_f32: -; CHECK: call_indirect $push[[L:[0-9]+]]=, $0, $1, $2, $0{{$}} -; CHECK: f32.reinterpret_i32 $push[[L1:[0-9]+]]=, $pop[[L]]{{$}} -; CHECK: return $pop[[L1]]{{$}} define float @mismatched_indirect_f32(%fn %f, i32 %x, i32 %y) { +; CHECK-LABEL: mismatched_indirect_f32: +; CHECK: .functype mismatched_indirect_f32 (i32, i32, i32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: call_indirect $push0=, $0, $1, $2, $0 # Invalid depth argument! +; CHECK-NEXT: f32.reinterpret_i32 $push1=, $pop0 +; CHECK-NEXT: return $pop1 %p = extractvalue %fn %f, 0 %v = tail call i32 %p(%fn %f, i32 %x, i32 %y) %u = bitcast i32 %v to float ret float %u } -; CHECK-LABEL: mismatched_byval: -; CHECK: i32.store -; CHECK: return_call quux, $pop{{[0-9]+}}{{$}} declare i32 @quux(ptr byval(i32)) define i32 @mismatched_byval(ptr %x) { +; CHECK-LABEL: mismatched_byval: +; CHECK: .functype mismatched_byval (i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push1=, __stack_pointer +; CHECK-NEXT: i32.const $push2=, 16 +; CHECK-NEXT: i32.sub $push8=, $pop1, $pop2 +; CHECK-NEXT: local.tee $push7=, $1=, $pop8 +; CHECK-NEXT: global.set __stack_pointer, $pop7 +; CHECK-NEXT: i32.load $push0=, 0($0) +; CHECK-NEXT: i32.store 12($1), $pop0 +; CHECK-NEXT: i32.const $push3=, 16 +; CHECK-NEXT: i32.add $push4=, $1, $pop3 +; CHECK-NEXT: global.set __stack_pointer, $pop4 +; CHECK-NEXT: i32.const $push5=, 12 +; CHECK-NEXT: i32.add $push6=, $1, $pop5 +; CHECK-NEXT: return_call quux, $pop6 %v = tail call i32 @quux(ptr byval(i32) %x) ret i32 %v } -; CHECK-LABEL: varargs: -; CHECK: i32.store -; CHECK: call $0=, var, $1{{$}} -; CHECK: return $0{{$}} declare i32 @var(...) define i32 @varargs(i32 %x) { +; CHECK-LABEL: varargs: +; CHECK: .functype varargs (i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push0=, __stack_pointer +; CHECK-NEXT: i32.const $push1=, 16 +; CHECK-NEXT: i32.sub $push5=, $pop0, $pop1 +; CHECK-NEXT: local.tee $push4=, $1=, $pop5 +; CHECK-NEXT: global.set __stack_pointer, $pop4 +; CHECK-NEXT: i32.store 0($1), $0 +; CHECK-NEXT: call $0=, var, $1 +; CHECK-NEXT: i32.const $push2=, 16 +; CHECK-NEXT: i32.add $push3=, $1, $pop2 +; CHECK-NEXT: global.set __stack_pointer, $pop3 +; CHECK-NEXT: return $0 %v = tail call i32 (...) @var(i32 %x) ret i32 %v } ; Type transformations inhibit tail calls, even when they are nops -; CHECK-LABEL: mismatched_return_zext: -; CHECK: call define i32 @mismatched_return_zext() { +; SLOW-LABEL: mismatched_return_zext: +; SLOW: .functype mismatched_return_zext () -> (i32) +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: i32.const $push0=, 1 +; SLOW-NEXT: call $push1=, foo, $pop0 +; SLOW-NEXT: i32.const $push3=, 1 +; SLOW-NEXT: i32.and $push2=, $pop1, $pop3 +; SLOW-NEXT: return $pop2 +; +; FAST-LABEL: mismatched_return_zext: +; FAST: .functype mismatched_return_zext () -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.const $push2=, 1 +; FAST-NEXT: call $push1=, foo, $pop2 +; FAST-NEXT: i32.const $push3=, 1 +; FAST-NEXT: i32.and $push0=, $pop1, $pop3 +; FAST-NEXT: return $pop0 %v = tail call i1 @foo(i1 1) %u = zext i1 %v to i32 ret i32 %u } -; CHECK-LABEL: mismatched_return_sext: -; CHECK: call define i32 @mismatched_return_sext() { +; SLOW-LABEL: mismatched_return_sext: +; SLOW: .functype mismatched_return_sext () -> (i32) +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: i32.const $push3=, 0 +; SLOW-NEXT: i32.const $push0=, 1 +; SLOW-NEXT: call $push1=, foo, $pop0 +; SLOW-NEXT: i32.const $push5=, 1 +; SLOW-NEXT: i32.and $push2=, $pop1, $pop5 +; SLOW-NEXT: i32.sub $push4=, $pop3, $pop2 +; SLOW-NEXT: return $pop4 +; +; FAST-LABEL: mismatched_return_sext: +; FAST: .functype mismatched_return_sext () -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.const $push4=, 1 +; FAST-NEXT: call $push3=, foo, $pop4 +; FAST-NEXT: i32.const $push0=, 31 +; FAST-NEXT: i32.shl $push1=, $pop3, $pop0 +; FAST-NEXT: i32.const $push5=, 31 +; FAST-NEXT: i32.shr_s $push2=, $pop1, $pop5 +; FAST-NEXT: return $pop2 %v = tail call i1 @foo(i1 1) %u = sext i1 %v to i32 ret i32 %u } -; CHECK-LABEL: mismatched_return_trunc: -; CHECK: call declare i32 @int() define i1 @mismatched_return_trunc() { +; CHECK-LABEL: mismatched_return_trunc: +; CHECK: .functype mismatched_return_trunc () -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: call $push0=, int +; CHECK-NEXT: return $pop0 %v = tail call i32 @int() %u = trunc i32 %v to i1 ret i1 %u @@ -210,36 +392,158 @@ define i1 @mismatched_return_trunc() { ; Stack-allocated arguments inhibit tail calls -; CHECK-LABEL: stack_arg: -; CHECK: call define i32 @stack_arg(ptr %x) { +; SLOW-LABEL: stack_arg: +; SLOW: .functype stack_arg (i32) -> (i32) +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: global.get $push0=, __stack_pointer +; SLOW-NEXT: i32.const $push1=, 16 +; SLOW-NEXT: i32.sub $push7=, $pop0, $pop1 +; SLOW-NEXT: local.tee $push6=, $2=, $pop7 +; SLOW-NEXT: global.set __stack_pointer, $pop6 +; SLOW-NEXT: i32.const $push4=, 12 +; SLOW-NEXT: i32.add $push5=, $2, $pop4 +; SLOW-NEXT: call $1=, stack_arg, $pop5 +; SLOW-NEXT: i32.const $push2=, 16 +; SLOW-NEXT: i32.add $push3=, $2, $pop2 +; SLOW-NEXT: global.set __stack_pointer, $pop3 +; SLOW-NEXT: return $1 +; +; FAST-LABEL: stack_arg: +; FAST: .functype stack_arg (i32) -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: global.get $push1=, __stack_pointer +; FAST-NEXT: i32.const $push2=, 16 +; FAST-NEXT: i32.sub $push8=, $pop1, $pop2 +; FAST-NEXT: local.tee $push7=, $2=, $pop8 +; FAST-NEXT: global.set __stack_pointer, $pop7 +; FAST-NEXT: i32.const $push5=, 12 +; FAST-NEXT: i32.add $push6=, $2, $pop5 +; FAST-NEXT: local.copy $push0=, $pop6 +; FAST-NEXT: call $1=, stack_arg, $pop0 +; FAST-NEXT: i32.const $push3=, 16 +; FAST-NEXT: i32.add $push4=, $2, $pop3 +; FAST-NEXT: global.set __stack_pointer, $pop4 +; FAST-NEXT: return $1 %a = alloca i32 %v = tail call i32 @stack_arg(ptr %a) ret i32 %v } -; CHECK-LABEL: stack_arg_gep: -; CHECK: call define i32 @stack_arg_gep(ptr %x) { +; SLOW-LABEL: stack_arg_gep: +; SLOW: .functype stack_arg_gep (i32) -> (i32) +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: global.get $push2=, __stack_pointer +; SLOW-NEXT: i32.const $push3=, 16 +; SLOW-NEXT: i32.sub $push9=, $pop2, $pop3 +; SLOW-NEXT: local.tee $push8=, $2=, $pop9 +; SLOW-NEXT: global.set __stack_pointer, $pop8 +; SLOW-NEXT: i32.const $push6=, 8 +; SLOW-NEXT: i32.add $push7=, $2, $pop6 +; SLOW-NEXT: i32.const $push0=, 4 +; SLOW-NEXT: i32.or $push1=, $pop7, $pop0 +; SLOW-NEXT: call $1=, stack_arg_gep, $pop1 +; SLOW-NEXT: i32.const $push4=, 16 +; SLOW-NEXT: i32.add $push5=, $2, $pop4 +; SLOW-NEXT: global.set __stack_pointer, $pop5 +; SLOW-NEXT: return $1 +; +; FAST-LABEL: stack_arg_gep: +; FAST: .functype stack_arg_gep (i32) -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: global.get $push3=, __stack_pointer +; FAST-NEXT: i32.const $push4=, 16 +; FAST-NEXT: i32.sub $push10=, $pop3, $pop4 +; FAST-NEXT: local.tee $push9=, $2=, $pop10 +; FAST-NEXT: global.set __stack_pointer, $pop9 +; FAST-NEXT: i32.const $push7=, 8 +; FAST-NEXT: i32.add $push8=, $2, $pop7 +; FAST-NEXT: local.copy $push0=, $pop8 +; FAST-NEXT: i32.const $push1=, 4 +; FAST-NEXT: i32.add $push2=, $pop0, $pop1 +; FAST-NEXT: call $1=, stack_arg_gep, $pop2 +; FAST-NEXT: i32.const $push5=, 16 +; FAST-NEXT: i32.add $push6=, $2, $pop5 +; FAST-NEXT: global.set __stack_pointer, $pop6 +; FAST-NEXT: return $1 %a = alloca { i32, i32 } %p = getelementptr { i32, i32 }, ptr %a, i32 0, i32 1 %v = tail call i32 @stack_arg_gep(ptr %p) ret i32 %v } -; CHECK-LABEL: stack_arg_cast: -; CHECK: global.get $push{{[0-9]+}}=, __stack_pointer -; CHECK: global.set __stack_pointer, $pop{{[0-9]+}} -; FAST: call ${{[0-9]+}}=, stack_arg_cast, $pop{{[0-9]+}} -; CHECK: global.set __stack_pointer, $pop{{[0-9]+}} -; SLOW: return_call stack_arg_cast, ${{[0-9]+}} define i32 @stack_arg_cast(i32 %x) { +; SLOW-LABEL: stack_arg_cast: +; SLOW: .functype stack_arg_cast (i32) -> (i32) +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: global.get $push0=, __stack_pointer +; SLOW-NEXT: i32.const $push1=, 256 +; SLOW-NEXT: i32.sub $push5=, $pop0, $pop1 +; SLOW-NEXT: local.tee $push4=, $1=, $pop5 +; SLOW-NEXT: global.set __stack_pointer, $pop4 +; SLOW-NEXT: i32.const $push2=, 256 +; SLOW-NEXT: i32.add $push3=, $1, $pop2 +; SLOW-NEXT: global.set __stack_pointer, $pop3 +; SLOW-NEXT: return_call stack_arg_cast, $1 +; +; FAST-LABEL: stack_arg_cast: +; FAST: .functype stack_arg_cast (i32) -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: global.get $push1=, __stack_pointer +; FAST-NEXT: i32.const $push2=, 256 +; FAST-NEXT: i32.sub $push6=, $pop1, $pop2 +; FAST-NEXT: local.tee $push5=, $2=, $pop6 +; FAST-NEXT: global.set __stack_pointer, $pop5 +; FAST-NEXT: local.copy $push0=, $2 +; FAST-NEXT: call $1=, stack_arg_cast, $pop0 +; FAST-NEXT: i32.const $push3=, 256 +; FAST-NEXT: i32.add $push4=, $2, $pop3 +; FAST-NEXT: global.set __stack_pointer, $pop4 +; FAST-NEXT: return $1 %a = alloca [64 x i32] %i = ptrtoint ptr %a to i32 %v = tail call i32 @stack_arg_cast(i32 %i) ret i32 %v } +; Checks that epilogues are inserted after return calls. +define i32 @direct_epilogue() { +; CHECK-LABEL: direct_epilogue: +; CHECK: .functype direct_epilogue () -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push0=, __stack_pointer +; CHECK-NEXT: i32.const $push1=, 256 +; CHECK-NEXT: i32.sub $push5=, $pop0, $pop1 +; CHECK-NEXT: local.tee $push4=, $0=, $pop5 +; CHECK-NEXT: global.set __stack_pointer, $pop4 +; CHECK-NEXT: i32.const $push2=, 256 +; CHECK-NEXT: i32.add $push3=, $0, $pop2 +; CHECK-NEXT: global.set __stack_pointer, $pop3 +; CHECK-NEXT: return_call direct_epilogue + %a = alloca [64 x i32] + %v = musttail call i32 @direct_epilogue() + ret i32 %v +} + +define i32 @indirect_epilogue(ptr %p) { +; CHECK-LABEL: indirect_epilogue: +; CHECK: .functype indirect_epilogue (i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push0=, __stack_pointer +; CHECK-NEXT: i32.const $push1=, 256 +; CHECK-NEXT: i32.sub $push5=, $pop0, $pop1 +; CHECK-NEXT: local.tee $push4=, $1=, $pop5 +; CHECK-NEXT: global.set __stack_pointer, $pop4 +; CHECK-NEXT: i32.const $push2=, 256 +; CHECK-NEXT: i32.add $push3=, $1, $pop2 +; CHECK-NEXT: global.set __stack_pointer, $pop3 +; CHECK-NEXT: return_call_indirect , $0, $0 + %a = alloca [64 x i32] + %v = musttail call i32 %p(ptr %p) + ret i32 %v +} + ; Check that the signatures generated for external indirectly ; return-called functions include the proper return types @@ -252,6 +556,28 @@ define i32 @stack_arg_cast(i32 %x) { ; YAML-NEXT: ReturnTypes: ; YAML-NEXT: - I32 define i32 @unique_caller(ptr %p) { +; SLOW-LABEL: unique_caller: +; SLOW: .functype unique_caller (i32) -> (i32) +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: i32.const $push4=, 0 +; SLOW-NEXT: f32.const $push3=, 0x0p0 +; SLOW-NEXT: i64.const $push2=, 0 +; SLOW-NEXT: f64.const $push1=, 0x0p0 +; SLOW-NEXT: i32.load $push0=, 0($0) +; SLOW-NEXT: return_call_indirect , $pop4, $pop3, $pop2, $pop1, $pop0 +; +; FAST-LABEL: unique_caller: +; FAST: .functype unique_caller (i32) -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.const $push1=, 0 +; FAST-NEXT: i32.const $push7=, 0 +; FAST-NEXT: f32.convert_i32_s $push2=, $pop7 +; FAST-NEXT: i64.const $push3=, 0 +; FAST-NEXT: i32.const $push6=, 0 +; FAST-NEXT: f64.convert_i32_s $push4=, $pop6 +; FAST-NEXT: i32.load $push5=, 0($0) +; FAST-NEXT: call_indirect $push0=, $pop1, $pop2, $pop3, $pop4, $pop5 # Invalid depth argument! +; FAST-NEXT: return $pop0 %f = load ptr, ptr %p %v = tail call i32 %f(i32 0, float 0., i64 0, double 0.) ret i32 %v diff --git a/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll b/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll index 5e3bea0a83c24..da8e7b16a0cef 100644 --- a/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll +++ b/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll @@ -53,7 +53,6 @@ define i32 @z() nounwind ssp { ; CHECK-NEXT: retl ; CHECK-NEXT: LBB0_3: ## %CallStackCheckFailBlk ; CHECK-NEXT: calll ___stack_chk_fail -; CHECK-NEXT: ud2 entry: %retval = alloca i32 ; [#uses=2] %xxx = alloca %struct.X ; [#uses=6] diff --git a/llvm/test/CodeGen/X86/cmov.ll b/llvm/test/CodeGen/X86/cmov.ll index 94df5fa6d96fc..dbe85eced6a59 100644 --- a/llvm/test/CodeGen/X86/cmov.ll +++ b/llvm/test/CodeGen/X86/cmov.ll @@ -213,10 +213,10 @@ define i64 @test8(i64 %0, i64 %1, i64 %2) { define i32 @smin(i32 %x) { ; CHECK-LABEL: smin: ; CHECK: # %bb.0: -; CHECK-NEXT: notl %edi ; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: notl %edi ; CHECK-NEXT: movl $-1, %eax -; CHECK-NEXT: cmovsl %edi, %eax +; CHECK-NEXT: cmovnsl %edi, %eax ; CHECK-NEXT: retq %not_x = xor i32 %x, -1 %1 = icmp slt i32 %not_x, -1 diff --git a/llvm/test/CodeGen/X86/pr45378.ll b/llvm/test/CodeGen/X86/pr45378.ll index fecfa95f8b838..aa870b7afbd38 100644 --- a/llvm/test/CodeGen/X86/pr45378.ll +++ b/llvm/test/CodeGen/X86/pr45378.ll @@ -15,7 +15,7 @@ define i1 @parseHeaders(ptr %ptr) nounwind { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -45,7 +45,7 @@ define i1 @parseHeaders2_scalar_or(ptr %ptr) nounwind { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/pr53419.ll b/llvm/test/CodeGen/X86/pr53419.ll index d92a7ceecec48..9455810fa2d78 100644 --- a/llvm/test/CodeGen/X86/pr53419.ll +++ b/llvm/test/CodeGen/X86/pr53419.ll @@ -13,32 +13,12 @@ declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1>) ; FIXME: All four versions are semantically equivalent and should produce same asm as scalar version. define i1 @intrinsic_v2i8(ptr align 1 %arg, ptr align 1 %arg1) { -; SSE-LABEL: intrinsic_v2i8: -; SSE: # %bb.0: # %bb -; SSE-NEXT: movzwl (%rdi), %eax -; SSE-NEXT: cmpw %ax, (%rsi) -; SSE-NEXT: sete %al -; SSE-NEXT: retq -; -; AVX-LABEL: intrinsic_v2i8: -; AVX: # %bb.0: # %bb -; AVX-NEXT: movzwl (%rdi), %eax -; AVX-NEXT: cmpw %ax, (%rsi) -; AVX-NEXT: sete %al -; AVX-NEXT: retq -; -; AVX512-LABEL: intrinsic_v2i8: -; AVX512: # %bb.0: # %bb -; AVX512-NEXT: movzwl (%rsi), %eax -; AVX512-NEXT: vmovd %eax, %xmm0 -; AVX512-NEXT: movzwl (%rdi), %eax -; AVX512-NEXT: vmovd %eax, %xmm1 -; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 -; AVX512-NEXT: knotw %k0, %k0 -; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: testb $3, %al -; AVX512-NEXT: sete %al -; AVX512-NEXT: retq +; X64-LABEL: intrinsic_v2i8: +; X64: # %bb.0: # %bb +; X64-NEXT: movzwl (%rdi), %eax +; X64-NEXT: cmpw %ax, (%rsi) +; X64-NEXT: sete %al +; X64-NEXT: retq ; ; X86-LABEL: intrinsic_v2i8: ; X86: # %bb.0: # %bb @@ -57,30 +37,12 @@ bb: } define i1 @intrinsic_v4i8(ptr align 1 %arg, ptr align 1 %arg1) { -; SSE-LABEL: intrinsic_v4i8: -; SSE: # %bb.0: # %bb -; SSE-NEXT: movl (%rdi), %eax -; SSE-NEXT: cmpl %eax, (%rsi) -; SSE-NEXT: sete %al -; SSE-NEXT: retq -; -; AVX-LABEL: intrinsic_v4i8: -; AVX: # %bb.0: # %bb -; AVX-NEXT: movl (%rdi), %eax -; AVX-NEXT: cmpl %eax, (%rsi) -; AVX-NEXT: sete %al -; AVX-NEXT: retq -; -; AVX512-LABEL: intrinsic_v4i8: -; AVX512: # %bb.0: # %bb -; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX512-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 -; AVX512-NEXT: knotw %k0, %k0 -; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: testb $15, %al -; AVX512-NEXT: sete %al -; AVX512-NEXT: retq +; X64-LABEL: intrinsic_v4i8: +; X64: # %bb.0: # %bb +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: cmpl %eax, (%rsi) +; X64-NEXT: sete %al +; X64-NEXT: retq ; ; X86-LABEL: intrinsic_v4i8: ; X86: # %bb.0: # %bb @@ -99,28 +61,12 @@ bb: } define i1 @intrinsic_v8i8(ptr align 1 %arg, ptr align 1 %arg1) { -; SSE-LABEL: intrinsic_v8i8: -; SSE: # %bb.0: # %bb -; SSE-NEXT: movq (%rdi), %rax -; SSE-NEXT: cmpq %rax, (%rsi) -; SSE-NEXT: sete %al -; SSE-NEXT: retq -; -; AVX-LABEL: intrinsic_v8i8: -; AVX: # %bb.0: # %bb -; AVX-NEXT: movq (%rdi), %rax -; AVX-NEXT: cmpq %rax, (%rsi) -; AVX-NEXT: sete %al -; AVX-NEXT: retq -; -; AVX512-LABEL: intrinsic_v8i8: -; AVX512: # %bb.0: # %bb -; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 -; AVX512-NEXT: kortestb %k0, %k0 -; AVX512-NEXT: setb %al -; AVX512-NEXT: retq +; X64-LABEL: intrinsic_v8i8: +; X64: # %bb.0: # %bb +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: cmpq %rax, (%rsi) +; X64-NEXT: sete %al +; X64-NEXT: retq ; ; X86-LABEL: intrinsic_v8i8: ; X86: # %bb.0: # %bb diff --git a/llvm/test/CodeGen/X86/ptest.ll b/llvm/test/CodeGen/X86/ptest.ll index 066cbb6193317..5983d502af3dd 100644 --- a/llvm/test/CodeGen/X86/ptest.ll +++ b/llvm/test/CodeGen/X86/ptest.ll @@ -10,7 +10,7 @@ define i32 @veccond128(<4 x i32> %input) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: je .LBB0_2 ; SSE2-NEXT: # %bb.1: # %if-true-block ; SSE2-NEXT: xorl %eax, %eax @@ -57,7 +57,7 @@ define i32 @veccond256(<8 x i32> %input) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: je .LBB1_2 ; SSE2-NEXT: # %bb.1: # %if-true-block ; SSE2-NEXT: xorl %eax, %eax @@ -109,7 +109,7 @@ define i32 @veccond512(<16 x i32> %input) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: je .LBB2_2 ; SSE2-NEXT: # %bb.1: # %if-true-block ; SSE2-NEXT: xorl %eax, %eax @@ -148,9 +148,8 @@ define i32 @veccond512(<16 x i32> %input) { ; ; AVX512-LABEL: veccond512: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: je .LBB2_2 ; AVX512-NEXT: # %bb.1: # %if-true-block ; AVX512-NEXT: xorl %eax, %eax @@ -177,7 +176,7 @@ define i32 @vectest128(<4 x i32> %input) { ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %ecx ; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %ecx # imm = 0xFFFF ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; @@ -208,7 +207,7 @@ define i32 @vectest256(<8 x i32> %input) { ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %ecx ; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %ecx # imm = 0xFFFF ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; @@ -243,7 +242,7 @@ define i32 @vectest512(<16 x i32> %input) { ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %ecx ; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %ecx # imm = 0xFFFF ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; @@ -268,10 +267,9 @@ define i32 @vectest512(<16 x i32> %input) { ; ; AVX512-LABEL: vectest512: ; AVX512: # %bb.0: -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -288,7 +286,7 @@ define i32 @vecsel128(<4 x i32> %input, i32 %a, i32 %b) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %ecx -; SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %ecx # imm = 0xFFFF ; SSE2-NEXT: cmovel %esi, %eax ; SSE2-NEXT: retq ; @@ -319,7 +317,7 @@ define i32 @vecsel256(<8 x i32> %input, i32 %a, i32 %b) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %ecx -; SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %ecx # imm = 0xFFFF ; SSE2-NEXT: cmovel %esi, %eax ; SSE2-NEXT: retq ; @@ -354,7 +352,7 @@ define i32 @vecsel512(<16 x i32> %input, i32 %a, i32 %b) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %ecx -; SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %ecx # imm = 0xFFFF ; SSE2-NEXT: cmovel %esi, %eax ; SSE2-NEXT: retq ; @@ -380,9 +378,8 @@ define i32 @vecsel512(<16 x i32> %input, i32 %a, i32 %b) { ; AVX512-LABEL: vecsel512: ; AVX512: # %bb.0: ; AVX512-NEXT: movl %edi, %eax -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: cmovel %esi, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/select-constant-lea.ll b/llvm/test/CodeGen/X86/select-constant-lea.ll new file mode 100644 index 0000000000000..e8472053353cc --- /dev/null +++ b/llvm/test/CodeGen/X86/select-constant-lea.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=BASE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=slow-3ops-lea | FileCheck %s --check-prefix=SLOWLEA3 + +define i32 @select_unsigned_lt_10_8_13j(i32 %0) { +; BASE-LABEL: select_unsigned_lt_10_8_13j: +; BASE: # %bb.0: +; BASE-NEXT: xorl %eax, %eax +; BASE-NEXT: cmpl $10, %edi +; BASE-NEXT: setae %al +; BASE-NEXT: leal (%rax,%rax,4), %eax +; BASE-NEXT: orl $8, %eax +; BASE-NEXT: retq +; +; SLOWLEA3-LABEL: select_unsigned_lt_10_8_13j: +; SLOWLEA3: # %bb.0: +; SLOWLEA3-NEXT: xorl %eax, %eax +; SLOWLEA3-NEXT: cmpl $10, %edi +; SLOWLEA3-NEXT: setae %al +; SLOWLEA3-NEXT: leal (%rax,%rax,4), %eax +; SLOWLEA3-NEXT: orl $8, %eax +; SLOWLEA3-NEXT: retq + %2 = icmp ult i32 %0, 10 + %3 = select i1 %2, i32 8, i32 13 + ret i32 %3 +} diff --git a/llvm/test/CodeGen/X86/setcc-combine.ll b/llvm/test/CodeGen/X86/setcc-combine.ll index c2ee78989ba16..780a769bc9e2b 100644 --- a/llvm/test/CodeGen/X86/setcc-combine.ll +++ b/llvm/test/CodeGen/X86/setcc-combine.ll @@ -499,3 +499,519 @@ define double @ogt_no_zero(double %x) { %r = select i1 %cmp, double %x, double %neg ret double %r } + +define i64 @cmp_sgt_not(i64 %a, i64 %b) { +; CHECK-LABEL: cmp_sgt_not: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq %rdi, %rsi +; CHECK-NEXT: setg %al +; CHECK-NEXT: negq %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %nb = xor i64 %b, -1 + %c = icmp sgt i64 %na, %nb + %r = sext i1 %c to i64 + ret i64 %r +} + +define i64 @cmp_sgt_not_with_constant(i64 %a) { +; CHECK-LABEL: cmp_sgt_not_with_constant: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $-43, %rdi +; CHECK-NEXT: setl %al +; CHECK-NEXT: negq %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %c = icmp sgt i64 %na, 42 + %r = sext i1 %c to i64 + ret i64 %r +} + +define <4 x i32> @cmp_sgt_not_with_vec(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: cmp_sgt_not_with_vec: +; CHECK: # %bb.0: +; CHECK-NEXT: pcmpgtd %xmm0, %xmm1 +; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: retq + %na = xor <4 x i32> %a, + %nb = xor <4 x i32> %b, + %c = icmp sgt <4 x i32> %na, %nb + %r = sext <4 x i1> %c to <4 x i32> + ret <4 x i32> %r +} + +define i64 @cmp_ugt_not(i64 %a, i64 %b) { +; CHECK-LABEL: cmp_ugt_not: +; CHECK: # %bb.0: +; CHECK-NEXT: notq %rdi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: addq %rsi, %rdi +; CHECK-NEXT: sbbq %rax, %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %nb = xor i64 %b, -1 + %c = icmp ugt i64 %na, %nb + %r = sext i1 %c to i64 + ret i64 %r +} + +define i64 @cmp_ugt_not_with_constant(i64 %a) { +; CHECK-LABEL: cmp_ugt_not_with_constant: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $-43, %rdi +; CHECK-NEXT: sbbq %rax, %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %c = icmp ugt i64 %na, 42 + %r = sext i1 %c to i64 + ret i64 %r +} + +define <4 x i32> @cmp_ugt_not_with_vec(<4 x i32> %a, <4 x i32> %b) { +; SSE2-LABEL: cmp_ugt_not_with_vec: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: cmp_ugt_not_with_vec: +; SSE41: # %bb.0: +; SSE41-NEXT: pminud %xmm1, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: retq + %na = xor <4 x i32> %a, + %nb = xor <4 x i32> %b, + %c = icmp ugt <4 x i32> %na, %nb + %r = sext <4 x i1> %c to <4 x i32> + ret <4 x i32> %r +} + +define i64 @cmp_sge_not(i64 %a, i64 %b) { +; CHECK-LABEL: cmp_sge_not: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq %rdi, %rsi +; CHECK-NEXT: setge %al +; CHECK-NEXT: negq %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %nb = xor i64 %b, -1 + %c = icmp sge i64 %na, %nb + %r = sext i1 %c to i64 + ret i64 %r +} + +define i64 @cmp_sge_not_with_constant(i64 %a) { +; CHECK-LABEL: cmp_sge_not_with_constant: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $-42, %rdi +; CHECK-NEXT: setl %al +; CHECK-NEXT: negq %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %c = icmp sge i64 %na, 42 + %r = sext i1 %c to i64 + ret i64 %r +} + +define <4 x i32> @cmp_sge_not_with_vec(<4 x i32> %a, <4 x i32> %b) { +; SSE2-LABEL: cmp_sge_not_with_vec: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: cmp_sge_not_with_vec: +; SSE41: # %bb.0: +; SSE41-NEXT: pmaxud %xmm1, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE41-NEXT: retq + %na = xor <4 x i32> %a, + %nb = xor <4 x i32> %b, + %c = icmp uge <4 x i32> %na, %nb + %r = sext <4 x i1> %c to <4 x i32> + ret <4 x i32> %r +} + +define i64 @cmp_uge_not(i64 %a, i64 %b) { +; CHECK-LABEL: cmp_uge_not: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq %rdi, %rsi +; CHECK-NEXT: adcq $-1, %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %nb = xor i64 %b, -1 + %c = icmp uge i64 %na, %nb + %r = sext i1 %c to i64 + ret i64 %r +} + +define i64 @cmp_uge_not_with_constant(i64 %a) { +; CHECK-LABEL: cmp_uge_not_with_constant: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $-42, %rdi +; CHECK-NEXT: sbbq %rax, %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %c = icmp uge i64 %na, 42 + %r = sext i1 %c to i64 + ret i64 %r +} + +define <4 x i32> @cmp_uge_not_with_vec(<4 x i32> %a, <4 x i32> %b) { +; SSE2-LABEL: cmp_uge_not_with_vec: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: cmp_uge_not_with_vec: +; SSE41: # %bb.0: +; SSE41-NEXT: pmaxud %xmm1, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE41-NEXT: retq + %na = xor <4 x i32> %a, + %nb = xor <4 x i32> %b, + %c = icmp uge <4 x i32> %na, %nb + %r = sext <4 x i1> %c to <4 x i32> + ret <4 x i32> %r +} + +define i64 @cmp_sle_not(i64 %a, i64 %b) { +; CHECK-LABEL: cmp_sle_not: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq %rdi, %rsi +; CHECK-NEXT: setle %al +; CHECK-NEXT: negq %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %nb = xor i64 %b, -1 + %c = icmp sle i64 %na, %nb + %r = sext i1 %c to i64 + ret i64 %r +} + +define i64 @cmp_sle_not_with_constant(i64 %a) { +; CHECK-LABEL: cmp_sle_not_with_constant: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $-43, %rdi +; CHECK-NEXT: setge %al +; CHECK-NEXT: negq %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %c = icmp sle i64 %na, 42 + %r = sext i1 %c to i64 + ret i64 %r +} + +define <4 x i32> @cmp_sle_not_with_vec(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: cmp_sle_not_with_vec: +; CHECK: # %bb.0: +; CHECK-NEXT: pcmpgtd %xmm0, %xmm1 +; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 +; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: retq + %na = xor <4 x i32> %a, + %nb = xor <4 x i32> %b, + %c = icmp sle <4 x i32> %na, %nb + %r = sext <4 x i1> %c to <4 x i32> + ret <4 x i32> %r +} + +define i64 @cmp_slt_not(i64 %a, i64 %b) { +; CHECK-LABEL: cmp_slt_not: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq %rdi, %rsi +; CHECK-NEXT: setl %al +; CHECK-NEXT: negq %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %nb = xor i64 %b, -1 + %c = icmp slt i64 %na, %nb + %r = sext i1 %c to i64 + ret i64 %r +} + +define i64 @cmp_slt_not_with_constant(i64 %a) { +; CHECK-LABEL: cmp_slt_not_with_constant: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $-42, %rdi +; CHECK-NEXT: setge %al +; CHECK-NEXT: negq %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %c = icmp slt i64 %na, 42 + %r = sext i1 %c to i64 + ret i64 %r +} + +define <4 x i32> @cmp_slt_not_with_vec(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: cmp_slt_not_with_vec: +; CHECK: # %bb.0: +; CHECK-NEXT: pcmpgtd %xmm1, %xmm0 +; CHECK-NEXT: retq + %na = xor <4 x i32> %a, + %nb = xor <4 x i32> %b, + %c = icmp slt <4 x i32> %na, %nb + %r = sext <4 x i1> %c to <4 x i32> + ret <4 x i32> %r +} + + +define i64 @cmp_ult_not(i64 %a, i64 %b) { +; CHECK-LABEL: cmp_ult_not: +; CHECK: # %bb.0: +; CHECK-NEXT: notq %rsi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: addq %rdi, %rsi +; CHECK-NEXT: sbbq %rax, %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %nb = xor i64 %b, -1 + %c = icmp ult i64 %na, %nb + %r = sext i1 %c to i64 + ret i64 %r +} + +define i64 @cmp_ult_not_with_constant(i64 %a) { +; CHECK-LABEL: cmp_ult_not_with_constant: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: addq $42, %rdi +; CHECK-NEXT: sbbq %rax, %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %c = icmp ult i64 %na, 42 + %r = sext i1 %c to i64 + ret i64 %r +} + +define <4 x i32> @cmp_ult_not_with_vec(<4 x i32> %a, <4 x i32> %b) { +; SSE2-LABEL: cmp_ult_not_with_vec: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: cmp_ult_not_with_vec: +; SSE41: # %bb.0: +; SSE41-NEXT: pmaxud %xmm1, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: retq + %na = xor <4 x i32> %a, + %nb = xor <4 x i32> %b, + %c = icmp ult <4 x i32> %na, %nb + %r = sext <4 x i1> %c to <4 x i32> + ret <4 x i32> %r +} + +define i64 @cmp_ule_not(i64 %a, i64 %b) { +; CHECK-LABEL: cmp_ule_not: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: adcq $-1, %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %nb = xor i64 %b, -1 + %c = icmp ule i64 %na, %nb + %r = sext i1 %c to i64 + ret i64 %r +} + +define i64 @cmp_ule_not_with_constant(i64 %a) { +; CHECK-LABEL: cmp_ule_not_with_constant: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $-43, %rdi +; CHECK-NEXT: adcq $-1, %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %c = icmp ule i64 %na, 42 + %r = sext i1 %c to i64 + ret i64 %r +} + +define <4 x i32> @cmp_ule_not_with_vec(<4 x i32> %a, <4 x i32> %b) { +; SSE2-LABEL: cmp_ule_not_with_vec: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: cmp_ule_not_with_vec: +; SSE41: # %bb.0: +; SSE41-NEXT: pminud %xmm1, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE41-NEXT: retq + %na = xor <4 x i32> %a, + %nb = xor <4 x i32> %b, + %c = icmp ule <4 x i32> %na, %nb + %r = sext <4 x i1> %c to <4 x i32> + ret <4 x i32> %r +} + +define i64 @cmp_eq_not(i64 %a, i64 %b) { +; CHECK-LABEL: cmp_eq_not: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: sete %al +; CHECK-NEXT: negq %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %nb = xor i64 %b, -1 + %c = icmp eq i64 %na, %nb + %r = sext i1 %c to i64 + ret i64 %r +} + +define i64 @cmp_eq_not_with_constant(i64 %a) { +; CHECK-LABEL: cmp_eq_not_with_constant: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $-43, %rdi +; CHECK-NEXT: sete %al +; CHECK-NEXT: negq %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %c = icmp eq i64 %na, 42 + %r = sext i1 %c to i64 + ret i64 %r +} + +define <4 x i32> @cmp_eq_not_with_vec(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: cmp_eq_not_with_vec: +; CHECK: # %bb.0: +; CHECK-NEXT: pcmpeqd %xmm1, %xmm0 +; CHECK-NEXT: retq + %na = xor <4 x i32> %a, + %nb = xor <4 x i32> %b, + %c = icmp eq <4 x i32> %na, %nb + %r = sext <4 x i1> %c to <4 x i32> + ret <4 x i32> %r +} +define i64 @cmp_ne_not(i64 %a, i64 %b) { +; CHECK-LABEL: cmp_ne_not: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: setne %al +; CHECK-NEXT: negq %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %nb = xor i64 %b, -1 + %c = icmp ne i64 %na, %nb + %r = sext i1 %c to i64 + ret i64 %r +} + +define i64 @cmp_ne_not_with_constant(i64 %a) { +; CHECK-LABEL: cmp_ne_not_with_constant: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $-43, %rdi +; CHECK-NEXT: setne %al +; CHECK-NEXT: negq %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %c = icmp ne i64 %na, 42 + %r = sext i1 %c to i64 + ret i64 %r +} + +define <4 x i32> @cmp_ne_not_with_vec(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: cmp_ne_not_with_vec: +; CHECK: # %bb.0: +; CHECK-NEXT: pcmpeqd %xmm1, %xmm0 +; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: retq + %na = xor <4 x i32> %a, + %nb = xor <4 x i32> %b, + %c = icmp ne <4 x i32> %na, %nb + %r = sext <4 x i1> %c to <4 x i32> + ret <4 x i32> %r +} + +define i64 @cmp_uge_not_commute(i64 %b, i64 %a) { +; CHECK-LABEL: cmp_uge_not_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: adcq $-1, %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %nb = xor i64 %b, -1 + %c = icmp uge i64 %na, %nb + %r = sext i1 %c to i64 + ret i64 %r +} + +define i64 @cmp_ult_not_with_constant_commute(i64 %a) { +; CHECK-LABEL: cmp_ult_not_with_constant_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $43, %rdi +; CHECK-NEXT: adcq $-1, %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %c = icmp ult i64 42, %a + %r = sext i1 %c to i64 + ret i64 %r +} + +define <2 x i64> @cmp_uge_not_with_vec2xi64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: cmp_uge_not_with_vec2xi64: +; CHECK: # %bb.0: +; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] +; CHECK-NEXT: pxor %xmm2, %xmm1 +; CHECK-NEXT: pxor %xmm2, %xmm0 +; CHECK-NEXT: movdqa %xmm0, %xmm2 +; CHECK-NEXT: pcmpgtd %xmm1, %xmm2 +; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] +; CHECK-NEXT: pcmpeqd %xmm1, %xmm0 +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; CHECK-NEXT: pand %xmm3, %xmm0 +; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] +; CHECK-NEXT: por %xmm0, %xmm1 +; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 +; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: retq + %na = xor <2 x i64> %a, + %nb = xor <2 x i64> %b, + %c = icmp uge <2 x i64> %na, %nb + %r = sext <2 x i1> %c to <2 x i64> + ret <2 x i64> %r +} diff --git a/llvm/test/CodeGen/X86/stack-protector-weight.ll b/llvm/test/CodeGen/X86/stack-protector-weight.ll index 0b7620fdee657..862b130bfa4c6 100644 --- a/llvm/test/CodeGen/X86/stack-protector-weight.ll +++ b/llvm/test/CodeGen/X86/stack-protector-weight.ll @@ -10,7 +10,7 @@ ; DARWIN-SELDAG: bb.[[SUCCESS]]{{[0-9a-zA-Z_.]+}}: ; DARWIN-IR: # Machine code for function test_branch_weights: -; DARWIN-IR: successors: %bb.[[SUCCESS:[0-9]+]](0x7fffffff), %bb.[[FAILURE:[0-9]+]] +; DARWIN-IR: successors: %bb.[[SUCCESS:[0-9]+]](0x7ffff800), %bb.[[FAILURE:[0-9]+]] ; DARWIN-IR: bb.[[SUCCESS]]{{[0-9a-zA-Z_.]+}}: ; DARWIN-IR: bb.[[FAILURE]]{{[0-9a-zA-Z_.]+}}: ; DARWIN-IR: CALL64pcrel32 @__stack_chk_fail diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll index fcb0ab6090398..f22d705068150 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll @@ -16,7 +16,7 @@ define i1 @test_v2i64(<2 x i64> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -43,7 +43,7 @@ define i1 @test_v4i64(<4 x i64> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; @@ -74,7 +74,7 @@ define i1 @test_v8i64(<8 x i64> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -105,9 +105,8 @@ define i1 @test_v8i64(<8 x i64> %a0) { ; ; AVX512-LABEL: test_v8i64: ; AVX512: # %bb.0: -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -129,7 +128,7 @@ define i1 @test_v16i64(<16 x i64> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; @@ -169,9 +168,8 @@ define i1 @test_v16i64(<16 x i64> %a0) { ; AVX512-LABEL: test_v16i64: ; AVX512: # %bb.0: ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -209,7 +207,7 @@ define i1 @test_v4i32(<4 x i32> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; @@ -236,7 +234,7 @@ define i1 @test_v8i32(<8 x i32> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -267,7 +265,7 @@ define i1 @test_v16i32(<16 x i32> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; @@ -298,9 +296,8 @@ define i1 @test_v16i32(<16 x i32> %a0) { ; ; AVX512-LABEL: test_v16i32: ; AVX512: # %bb.0: -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -322,7 +319,7 @@ define i1 @test_v32i32(<32 x i32> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -362,9 +359,8 @@ define i1 @test_v32i32(<32 x i32> %a0) { ; AVX512-LABEL: test_v32i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -421,7 +417,7 @@ define i1 @test_v8i16(<8 x i16> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -448,7 +444,7 @@ define i1 @test_v16i16(<16 x i16> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; @@ -479,7 +475,7 @@ define i1 @test_v32i16(<32 x i16> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -510,9 +506,8 @@ define i1 @test_v32i16(<32 x i16> %a0) { ; ; AVX512-LABEL: test_v32i16: ; AVX512: # %bb.0: -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -534,7 +529,7 @@ define i1 @test_v64i16(<64 x i16> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; @@ -574,9 +569,8 @@ define i1 @test_v64i16(<64 x i16> %a0) { ; AVX512-LABEL: test_v64i16: ; AVX512: # %bb.0: ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -652,7 +646,7 @@ define i1 @test_v16i8(<16 x i8> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; @@ -679,7 +673,7 @@ define i1 @test_v32i8(<32 x i8> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -710,7 +704,7 @@ define i1 @test_v64i8(<64 x i8> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; @@ -741,9 +735,8 @@ define i1 @test_v64i8(<64 x i8> %a0) { ; ; AVX512-LABEL: test_v64i8: ; AVX512: # %bb.0: -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -765,7 +758,7 @@ define i1 @test_v128i8(<128 x i8> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -805,9 +798,8 @@ define i1 @test_v128i8(<128 x i8> %a0) { ; AVX512-LABEL: test_v128i8: ; AVX512: # %bb.0: ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -871,10 +863,8 @@ define i1 @mask_v8i32(<8 x i32> %a0) { ; SSE2: # %bb.0: ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 -; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: pmovmskb %xmm0, %eax +; SSE2-NEXT: testl %eax, %eax ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -921,7 +911,7 @@ define i1 @trunc_v16i16(<16 x i16> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; @@ -972,8 +962,7 @@ define i1 @mask_v128i8(<128 x i8> %a0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: psllw $7, %xmm0 ; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: testl %eax, %eax ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -1014,10 +1003,8 @@ define i1 @mask_v128i8(<128 x i8> %a0) { ; AVX512-LABEL: mask_v128i8: ; AVX512: # %bb.0: ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673] -; AVX512-NEXT: vptest %ymm1, %ymm0 +; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1036,7 +1023,7 @@ define zeroext i1 @PR44781(ptr %0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vselect-post-combine.ll b/llvm/test/CodeGen/X86/vselect-post-combine.ll new file mode 100644 index 0000000000000..fdbc361e85d22 --- /dev/null +++ b/llvm/test/CodeGen/X86/vselect-post-combine.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 + +define ptr @test_mul(ptr %addr) { +; AVX2-LABEL: test_mul: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vmovdqa {{.*#+}} xmm0 = [255,0,0,0] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; AVX2-NEXT: vpblendvb %xmm0, (%rdi), %xmm1, %xmm0 +; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; AVX2-NEXT: vmovdqu %ymm0, 0 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +entry: + %vec0 = load <32 x i8>, ptr %addr + %vec1 = shufflevector <32 x i8> %vec0, <32 x i8> , <32 x i32> + %0 = bitcast <32 x i8> %vec1 to <4 x i64> + %shuffle = shufflevector <4 x i64> %0, <4 x i64> zeroinitializer, <2 x i32> + %1 = bitcast <2 x i64> %shuffle to <16 x i8> + %conv = zext <16 x i8> %1 to <16 x i16> + store <16 x i16> %conv, ptr null, align 1 + ret ptr null +} diff --git a/llvm/test/Instrumentation/BoundsChecking/simple.ll b/llvm/test/Instrumentation/BoundsChecking/simple.ll index 57858618d17b3..e329b90d0cde4 100644 --- a/llvm/test/Instrumentation/BoundsChecking/simple.ll +++ b/llvm/test/Instrumentation/BoundsChecking/simple.ll @@ -33,7 +33,7 @@ define void @f2() nounwind { ; CHECK-NEXT: store i32 3, ptr [[IDX]], align 4 ; CHECK-NEXT: ret void ; CHECK: trap: -; CHECK-NEXT: call void @llvm.trap() #[[ATTR5:[0-9]+]] +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6:[0-9]+]] ; CHECK-NEXT: unreachable ; %1 = tail call ptr @malloc(i64 32) @@ -57,7 +57,7 @@ define void @f3(i64 %x) nounwind { ; CHECK-NEXT: store i32 3, ptr [[IDX]], align 4 ; CHECK-NEXT: ret void ; CHECK: trap: -; CHECK-NEXT: call void @llvm.trap() #[[ATTR5]] +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] ; CHECK-NEXT: unreachable ; %1 = tail call ptr @calloc(i64 4, i64 %x) @@ -93,7 +93,7 @@ define void @f4(i64 %x) nounwind { ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[IDX]], align 4 ; CHECK-NEXT: ret void ; CHECK: trap: -; CHECK-NEXT: call void @llvm.trap() #[[ATTR5]] +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] ; CHECK-NEXT: unreachable ; %1 = tail call ptr @realloc(ptr null, i64 %x) nounwind @@ -115,7 +115,7 @@ define void @f5(i64 %x) nounwind { ; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[IDX]], align 4 ; CHECK-NEXT: ret void ; CHECK: trap: -; CHECK-NEXT: call void @llvm.trap() #[[ATTR5]] +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] ; CHECK-NEXT: unreachable ; %idx = getelementptr inbounds [8 x i8], ptr @.str, i64 0, i64 %x @@ -137,7 +137,7 @@ define void @f5_as1(i64 %x) nounwind { ; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr addrspace(1) [[IDX]], align 4 ; CHECK-NEXT: ret void ; CHECK: trap: -; CHECK-NEXT: call void @llvm.trap() #[[ATTR5]] +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] ; CHECK-NEXT: unreachable ; %idx = getelementptr inbounds [8 x i8], ptr addrspace(1) @.str_as1, i64 0, i64 %x @@ -169,7 +169,7 @@ define void @f7(i64 %x) nounwind { ; CHECK-NEXT: [[TMP8:%.*]] = load i128, ptr [[TMP2]], align 4 ; CHECK-NEXT: ret void ; CHECK: trap: -; CHECK-NEXT: call void @llvm.trap() #[[ATTR5]] +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] ; CHECK-NEXT: unreachable ; %1 = alloca i128, i64 %x @@ -222,7 +222,7 @@ define void @f10(i64 %x, i64 %y) nounwind { ; CHECK-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP6]], align 4 ; CHECK-NEXT: ret void ; CHECK: trap: -; CHECK-NEXT: call void @llvm.trap() #[[ATTR5]] +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] ; CHECK-NEXT: unreachable ; %1 = alloca i128, i64 %x @@ -240,7 +240,7 @@ define void @f11(ptr byval(i128) %x) nounwind { ; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP1]], align 4 ; CHECK-NEXT: ret void ; CHECK: trap: -; CHECK-NEXT: call void @llvm.trap() #[[ATTR5]] +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] ; CHECK-NEXT: unreachable ; %1 = getelementptr inbounds i8, ptr %x, i64 16 @@ -256,7 +256,7 @@ define void @f11_as1(ptr addrspace(1) byval(i128) %x) nounwind { ; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr addrspace(1) [[TMP1]], align 4 ; CHECK-NEXT: ret void ; CHECK: trap: -; CHECK-NEXT: call void @llvm.trap() #[[ATTR5]] +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] ; CHECK-NEXT: unreachable ; %1 = getelementptr inbounds i8, ptr addrspace(1) %x, i16 16 @@ -282,7 +282,7 @@ define i64 @f12(i64 %x, i64 %y) nounwind { ; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP4]], align 8 ; CHECK-NEXT: ret i64 [[TMP12]] ; CHECK: trap: -; CHECK-NEXT: call void @llvm.trap() #[[ATTR5]] +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] ; CHECK-NEXT: unreachable ; %1 = tail call ptr @calloc(i64 1, i64 %x) @@ -354,7 +354,7 @@ define i8 @f14(i1 %i) { ; CHECK-NEXT: [[RET:%.*]] = load i8, ptr [[P]], align 1 ; CHECK-NEXT: ret i8 [[RET]] ; CHECK: trap: -; CHECK-NEXT: call void @llvm.trap() #[[ATTR5]] +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] ; CHECK-NEXT: unreachable ; entry: @@ -396,7 +396,7 @@ define i8 @f15(i1 %i) { ; CHECK-NEXT: [[RET:%.*]] = load i8, ptr [[ALLOC]], align 1 ; CHECK-NEXT: ret i8 [[RET]] ; CHECK: trap: -; CHECK-NEXT: call void @llvm.trap() #[[ATTR5]] +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] ; CHECK-NEXT: unreachable ; entry: @@ -414,3 +414,53 @@ bb2: %ret = load i8, ptr %alloc ret i8 %ret } + +define <4 x i32> @load_vector(i64 %y) nounwind { +; CHECK-LABEL: @load_vector( +; CHECK-NEXT: [[TMP1:%.*]] = tail call ptr @calloc(i64 1, i64 256) +; CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[Y:%.*]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 0, [[DOTIDX]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 [[Y]] +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 256, [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 256, [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP4]], 16 +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TRAP:%.*]], label [[TMP8:%.*]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i32>, ptr [[TMP3]], align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP9]] +; CHECK: trap: +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] +; CHECK-NEXT: unreachable +; + %1 = tail call ptr @calloc(i64 1, i64 256) + %2 = getelementptr inbounds i64, ptr %1, i64 %y + %3 = load <4 x i32>, ptr %2, align 8 + ret <4 x i32> %3 +} + +define @load_scalable_vector(i64 %y) nounwind { +; CHECK-LABEL: @load_scalable_vector( +; CHECK-NEXT: [[TMP1:%.*]] = tail call ptr @calloc(i64 1, i64 256) +; CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[Y:%.*]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 0, [[DOTIDX]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 [[Y]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 256, [[TMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i64 256, [[TMP2]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; CHECK-NEXT: br i1 [[TMP9]], label [[TRAP:%.*]], label [[TMP10:%.*]] +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = load , ptr [[TMP3]], align 8 +; CHECK-NEXT: ret [[TMP11]] +; CHECK: trap: +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] +; CHECK-NEXT: unreachable +; + %1 = tail call ptr @calloc(i64 1, i64 256) + %2 = getelementptr inbounds i64, ptr %1, i64 %y + %3 = load , ptr %2, align 8 + ret %3 +} diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/vector-load-store.ll b/llvm/test/Instrumentation/HWAddressSanitizer/vector-load-store.ll new file mode 100644 index 0000000000000..5312c7cc7336d --- /dev/null +++ b/llvm/test/Instrumentation/HWAddressSanitizer/vector-load-store.ll @@ -0,0 +1,272 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=hwasan -S | FileCheck %s + +target triple = "aarch64--linux-android10000" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @load.v1i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @load.v1i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[P:%.*]], i32 2) +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i32>, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; + load <1 x i32>, ptr %p + ret void +} + +define void @load.v2i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @load.v2i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[P:%.*]], i32 3) +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[P]], align 8 +; CHECK-NEXT: ret void +; + load <2 x i32>, ptr %p + ret void +} + +define void @load.v4i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @load.v4i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[P:%.*]], i32 4) +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P]], align 16 +; CHECK-NEXT: ret void +; + load <4 x i32>, ptr %p + ret void +} + +define void @load.v8i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @load.v8i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: call void @__hwasan_loadN(i64 [[TMP1]], i64 32) +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[P]], align 32 +; CHECK-NEXT: ret void +; + load <8 x i32>, ptr %p + ret void +} + +define void @load.v16i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @load.v16i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: call void @__hwasan_loadN(i64 [[TMP1]], i64 64) +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr [[P]], align 64 +; CHECK-NEXT: ret void +; + load <16 x i32>, ptr %p + ret void +} + + +define void @store.v1i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @store.v1i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[P:%.*]], i32 18) +; CHECK-NEXT: store <1 x i32> zeroinitializer, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; + store <1 x i32> zeroinitializer, ptr %p + ret void +} + +define void @store.v2i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @store.v2i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[P:%.*]], i32 19) +; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[P]], align 8 +; CHECK-NEXT: ret void +; + store <2 x i32> zeroinitializer, ptr %p + ret void +} + +define void @store.v4i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @store.v4i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[P:%.*]], i32 20) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[P]], align 16 +; CHECK-NEXT: ret void +; + store <4 x i32> zeroinitializer, ptr %p + ret void +} + +define void @store.v8i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @store.v8i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: call void @__hwasan_storeN(i64 [[TMP1]], i64 32) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr [[P]], align 32 +; CHECK-NEXT: ret void +; + store <8 x i32> zeroinitializer, ptr %p + ret void +} + +define void @store.v16i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @store.v16i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: call void @__hwasan_storeN(i64 [[TMP1]], i64 64) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr [[P]], align 64 +; CHECK-NEXT: ret void +; + store <16 x i32> zeroinitializer, ptr %p + ret void +} + + +define void @load.nxv1i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @load.nxv1i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 +; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 [[TMP3]], 8 +; CHECK-NEXT: call void @__hwasan_loadN(i64 [[TMP1]], i64 [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = load , ptr [[P]], align 4 +; CHECK-NEXT: ret void +; + load , ptr %p + ret void +} + +define void @load.nxv2i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @load.nxv2i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 64 +; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 [[TMP3]], 8 +; CHECK-NEXT: call void @__hwasan_loadN(i64 [[TMP1]], i64 [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = load , ptr [[P]], align 8 +; CHECK-NEXT: ret void +; + load , ptr %p + ret void +} + +define void @load.nxv4i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @load.nxv4i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 128 +; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 [[TMP3]], 8 +; CHECK-NEXT: call void @__hwasan_loadN(i64 [[TMP1]], i64 [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = load , ptr [[P]], align 16 +; CHECK-NEXT: ret void +; + load , ptr %p + ret void +} + +define void @load.nxv8i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @load.nxv8i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 256 +; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 [[TMP3]], 8 +; CHECK-NEXT: call void @__hwasan_loadN(i64 [[TMP1]], i64 [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = load , ptr [[P]], align 32 +; CHECK-NEXT: ret void +; + load , ptr %p + ret void +} + +define void @load.nxv16i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @load.nxv16i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 512 +; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 [[TMP3]], 8 +; CHECK-NEXT: call void @__hwasan_loadN(i64 [[TMP1]], i64 [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = load , ptr [[P]], align 64 +; CHECK-NEXT: ret void +; + load , ptr %p + ret void +} + + +define void @store.nxv1i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @store.nxv1i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 +; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 [[TMP3]], 8 +; CHECK-NEXT: call void @__hwasan_storeN(i64 [[TMP1]], i64 [[TMP4]]) +; CHECK-NEXT: store zeroinitializer, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; + store zeroinitializer, ptr %p + ret void +} + +define void @store.nxv2i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @store.nxv2i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 64 +; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 [[TMP3]], 8 +; CHECK-NEXT: call void @__hwasan_storeN(i64 [[TMP1]], i64 [[TMP4]]) +; CHECK-NEXT: store zeroinitializer, ptr [[P]], align 8 +; CHECK-NEXT: ret void +; + store zeroinitializer, ptr %p + ret void +} + +define void @store.nxv4i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @store.nxv4i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 128 +; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 [[TMP3]], 8 +; CHECK-NEXT: call void @__hwasan_storeN(i64 [[TMP1]], i64 [[TMP4]]) +; CHECK-NEXT: store zeroinitializer, ptr [[P]], align 16 +; CHECK-NEXT: ret void +; + store zeroinitializer, ptr %p + ret void +} + +define void @store.nxv8i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @store.nxv8i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 256 +; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 [[TMP3]], 8 +; CHECK-NEXT: call void @__hwasan_storeN(i64 [[TMP1]], i64 [[TMP4]]) +; CHECK-NEXT: store zeroinitializer, ptr [[P]], align 32 +; CHECK-NEXT: ret void +; + store zeroinitializer, ptr %p + ret void +} + +define void @store.nxv16i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @store.nxv16i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 512 +; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 [[TMP3]], 8 +; CHECK-NEXT: call void @__hwasan_storeN(i64 [[TMP1]], i64 [[TMP4]]) +; CHECK-NEXT: store zeroinitializer, ptr [[P]], align 64 +; CHECK-NEXT: ret void +; + store zeroinitializer, ptr %p + ret void +} diff --git a/llvm/test/Instrumentation/MemorySanitizer/vector-load-store.ll b/llvm/test/Instrumentation/MemorySanitizer/vector-load-store.ll index 52c60e9b83b29..feb8a27fd5410 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/vector-load-store.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/vector-load-store.ll @@ -6,100 +6,210 @@ target triple = "x86_64-unknown-linux-gnu" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -define void @load.v1i32(ptr %p) sanitize_address { +define void @load.v1i32(ptr %p) sanitize_memory { ; CHECK-LABEL: @load.v1i32( ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i32>, ptr [[TMP4]], align 4 ; CHECK-NEXT: ret void ; ; ADDR-LABEL: @load.v1i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; ADDR-NEXT: call void @llvm.donothing() -; ADDR-NEXT: [[TMP1:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 4 +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0:![0-9]+]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3:[0-9]+]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 4 +; ADDR-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 +; ADDR-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; ADDR-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; ADDR-NEXT: [[_MSLD:%.*]] = load <1 x i32>, ptr [[TMP7]], align 4 ; ADDR-NEXT: ret void ; ; ORIGINS-LABEL: @load.v1i32( ; ORIGINS-NEXT: call void @llvm.donothing() ; ORIGINS-NEXT: [[TMP1:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 4 +; ORIGINS-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; ORIGINS-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; ORIGINS-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; ORIGINS-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416 +; ORIGINS-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ORIGINS-NEXT: [[_MSLD:%.*]] = load <1 x i32>, ptr [[TMP4]], align 4 +; ORIGINS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 ; ORIGINS-NEXT: ret void ; load <1 x i32>, ptr %p ret void } -define void @load.v2i32(ptr %p) sanitize_address { +define void @load.v2i32(ptr %p) sanitize_memory { ; CHECK-LABEL: @load.v2i32( ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8 ; CHECK-NEXT: ret void ; ; ADDR-LABEL: @load.v2i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; ADDR-NEXT: call void @llvm.donothing() -; ADDR-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 8 +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 8 +; ADDR-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 +; ADDR-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; ADDR-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; ADDR-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; ADDR-NEXT: ret void ; ; ORIGINS-LABEL: @load.v2i32( ; ORIGINS-NEXT: call void @llvm.donothing() ; ORIGINS-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 8 +; ORIGINS-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; ORIGINS-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; ORIGINS-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; ORIGINS-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416 +; ORIGINS-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ORIGINS-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8 +; ORIGINS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 8 ; ORIGINS-NEXT: ret void ; load <2 x i32>, ptr %p ret void } -define void @load.v4i32(ptr %p) sanitize_address { +define void @load.v4i32(ptr %p) sanitize_memory { ; CHECK-LABEL: @load.v4i32( ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 16 ; CHECK-NEXT: ret void ; ; ADDR-LABEL: @load.v4i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; ADDR-NEXT: call void @llvm.donothing() -; ADDR-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16 +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16 +; ADDR-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 +; ADDR-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; ADDR-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; ADDR-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 ; ADDR-NEXT: ret void ; ; ORIGINS-LABEL: @load.v4i32( ; ORIGINS-NEXT: call void @llvm.donothing() ; ORIGINS-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16 +; ORIGINS-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; ORIGINS-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; ORIGINS-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; ORIGINS-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416 +; ORIGINS-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ORIGINS-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 16 +; ORIGINS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 16 ; ORIGINS-NEXT: ret void ; load <4 x i32>, ptr %p ret void } -define void @load.v8i32(ptr %p) sanitize_address { +define void @load.v8i32(ptr %p) sanitize_memory { ; CHECK-LABEL: @load.v8i32( ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[P:%.*]], align 32 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 ; CHECK-NEXT: ret void ; ; ADDR-LABEL: @load.v8i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; ADDR-NEXT: call void @llvm.donothing() -; ADDR-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[P:%.*]], align 32 +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[P:%.*]], align 32 +; ADDR-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 +; ADDR-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; ADDR-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; ADDR-NEXT: [[_MSLD:%.*]] = load <8 x i32>, ptr [[TMP7]], align 32 ; ADDR-NEXT: ret void ; ; ORIGINS-LABEL: @load.v8i32( ; ORIGINS-NEXT: call void @llvm.donothing() ; ORIGINS-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[P:%.*]], align 32 +; ORIGINS-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; ORIGINS-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; ORIGINS-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; ORIGINS-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416 +; ORIGINS-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ORIGINS-NEXT: [[_MSLD:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +; ORIGINS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 32 ; ORIGINS-NEXT: ret void ; load <8 x i32>, ptr %p ret void } -define void @load.v16i32(ptr %p) sanitize_address { +define void @load.v16i32(ptr %p) sanitize_memory { ; CHECK-LABEL: @load.v16i32( ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[P:%.*]], align 64 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP4]], align 64 ; CHECK-NEXT: ret void ; ; ADDR-LABEL: @load.v16i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; ADDR-NEXT: call void @llvm.donothing() -; ADDR-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[P:%.*]], align 64 +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr [[P:%.*]], align 64 +; ADDR-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 +; ADDR-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; ADDR-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; ADDR-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP7]], align 64 ; ADDR-NEXT: ret void ; ; ORIGINS-LABEL: @load.v16i32( ; ORIGINS-NEXT: call void @llvm.donothing() ; ORIGINS-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[P:%.*]], align 64 +; ORIGINS-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; ORIGINS-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; ORIGINS-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; ORIGINS-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416 +; ORIGINS-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ORIGINS-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP4]], align 64 +; ORIGINS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 64 ; ORIGINS-NEXT: ret void ; load <16 x i32>, ptr %p @@ -107,7 +217,7 @@ define void @load.v16i32(ptr %p) sanitize_address { } -define void @store.v1i32(ptr %p) sanitize_address { +define void @store.v1i32(ptr %p) sanitize_memory { ; CHECK-LABEL: @store.v1i32( ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 @@ -118,11 +228,18 @@ define void @store.v1i32(ptr %p) sanitize_address { ; CHECK-NEXT: ret void ; ; ADDR-LABEL: @store.v1i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; ADDR-NEXT: call void @llvm.donothing() -; ADDR-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 -; ADDR-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 -; ADDR-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr -; ADDR-NEXT: store <1 x i32> zeroinitializer, ptr [[TMP3]], align 4 +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ADDR-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; ADDR-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ADDR-NEXT: store <1 x i32> zeroinitializer, ptr [[TMP6]], align 4 ; ADDR-NEXT: store <1 x i32> zeroinitializer, ptr [[P]], align 4 ; ADDR-NEXT: ret void ; @@ -141,7 +258,7 @@ define void @store.v1i32(ptr %p) sanitize_address { ret void } -define void @store.v2i32(ptr %p) sanitize_address { +define void @store.v2i32(ptr %p) sanitize_memory { ; CHECK-LABEL: @store.v2i32( ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 @@ -152,11 +269,18 @@ define void @store.v2i32(ptr %p) sanitize_address { ; CHECK-NEXT: ret void ; ; ADDR-LABEL: @store.v2i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; ADDR-NEXT: call void @llvm.donothing() -; ADDR-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 -; ADDR-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 -; ADDR-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr -; ADDR-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP3]], align 8 +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ADDR-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; ADDR-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ADDR-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP6]], align 8 ; ADDR-NEXT: store <2 x i32> zeroinitializer, ptr [[P]], align 8 ; ADDR-NEXT: ret void ; @@ -175,7 +299,7 @@ define void @store.v2i32(ptr %p) sanitize_address { ret void } -define void @store.v4i32(ptr %p) sanitize_address { +define void @store.v4i32(ptr %p) sanitize_memory { ; CHECK-LABEL: @store.v4i32( ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 @@ -186,11 +310,18 @@ define void @store.v4i32(ptr %p) sanitize_address { ; CHECK-NEXT: ret void ; ; ADDR-LABEL: @store.v4i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; ADDR-NEXT: call void @llvm.donothing() -; ADDR-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 -; ADDR-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 -; ADDR-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr -; ADDR-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP3]], align 16 +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ADDR-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; ADDR-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ADDR-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP6]], align 16 ; ADDR-NEXT: store <4 x i32> zeroinitializer, ptr [[P]], align 16 ; ADDR-NEXT: ret void ; @@ -209,7 +340,7 @@ define void @store.v4i32(ptr %p) sanitize_address { ret void } -define void @store.v8i32(ptr %p) sanitize_address { +define void @store.v8i32(ptr %p) sanitize_memory { ; CHECK-LABEL: @store.v8i32( ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 @@ -220,11 +351,18 @@ define void @store.v8i32(ptr %p) sanitize_address { ; CHECK-NEXT: ret void ; ; ADDR-LABEL: @store.v8i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; ADDR-NEXT: call void @llvm.donothing() -; ADDR-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 -; ADDR-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 -; ADDR-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr -; ADDR-NEXT: store <8 x i32> zeroinitializer, ptr [[TMP3]], align 32 +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ADDR-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; ADDR-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ADDR-NEXT: store <8 x i32> zeroinitializer, ptr [[TMP6]], align 32 ; ADDR-NEXT: store <8 x i32> zeroinitializer, ptr [[P]], align 32 ; ADDR-NEXT: ret void ; @@ -243,7 +381,7 @@ define void @store.v8i32(ptr %p) sanitize_address { ret void } -define void @store.v16i32(ptr %p) sanitize_address { +define void @store.v16i32(ptr %p) sanitize_memory { ; CHECK-LABEL: @store.v16i32( ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 @@ -254,11 +392,18 @@ define void @store.v16i32(ptr %p) sanitize_address { ; CHECK-NEXT: ret void ; ; ADDR-LABEL: @store.v16i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; ADDR-NEXT: call void @llvm.donothing() -; ADDR-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 -; ADDR-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 -; ADDR-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr -; ADDR-NEXT: store <16 x i32> zeroinitializer, ptr [[TMP3]], align 64 +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ADDR-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; ADDR-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ADDR-NEXT: store <16 x i32> zeroinitializer, ptr [[TMP6]], align 64 ; ADDR-NEXT: store <16 x i32> zeroinitializer, ptr [[P]], align 64 ; ADDR-NEXT: ret void ; @@ -277,4 +422,513 @@ define void @store.v16i32(ptr %p) sanitize_address { ret void } +define void @load.nxv1i32(ptr %p) sanitize_memory { +; CHECK-LABEL: @load.nxv1i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP4]], align 4 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @load.nxv1i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = load , ptr [[P:%.*]], align 4 +; ADDR-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 +; ADDR-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; ADDR-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; ADDR-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP7]], align 4 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @load.nxv1i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 4 +; ORIGINS-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; ORIGINS-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; ORIGINS-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; ORIGINS-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416 +; ORIGINS-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ORIGINS-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP4]], align 4 +; ORIGINS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +; ORIGINS-NEXT: ret void +; + load , ptr %p + ret void +} + +define void @load.nxv2i32(ptr %p) sanitize_memory { +; CHECK-LABEL: @load.nxv2i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP4]], align 8 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @load.nxv2i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = load , ptr [[P:%.*]], align 8 +; ADDR-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 +; ADDR-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; ADDR-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; ADDR-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP7]], align 8 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @load.nxv2i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 8 +; ORIGINS-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; ORIGINS-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; ORIGINS-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; ORIGINS-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416 +; ORIGINS-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ORIGINS-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP4]], align 8 +; ORIGINS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 8 +; ORIGINS-NEXT: ret void +; + load , ptr %p + ret void +} + +define void @load.nxv4i32(ptr %p) sanitize_memory { +; CHECK-LABEL: @load.nxv4i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP4]], align 16 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @load.nxv4i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = load , ptr [[P:%.*]], align 16 +; ADDR-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 +; ADDR-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; ADDR-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; ADDR-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP7]], align 16 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @load.nxv4i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 16 +; ORIGINS-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; ORIGINS-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; ORIGINS-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; ORIGINS-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416 +; ORIGINS-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ORIGINS-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP4]], align 16 +; ORIGINS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 16 +; ORIGINS-NEXT: ret void +; + load , ptr %p + ret void +} + +define void @load.nxv8i32(ptr %p) sanitize_memory { +; CHECK-LABEL: @load.nxv8i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 32 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP4]], align 32 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @load.nxv8i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = load , ptr [[P:%.*]], align 32 +; ADDR-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 +; ADDR-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; ADDR-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; ADDR-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP7]], align 32 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @load.nxv8i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 32 +; ORIGINS-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; ORIGINS-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; ORIGINS-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; ORIGINS-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416 +; ORIGINS-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ORIGINS-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP4]], align 32 +; ORIGINS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 32 +; ORIGINS-NEXT: ret void +; + load , ptr %p + ret void +} + +define void @load.nxv16i32(ptr %p) sanitize_memory { +; CHECK-LABEL: @load.nxv16i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 64 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP4]], align 64 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @load.nxv16i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = load , ptr [[P:%.*]], align 64 +; ADDR-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 +; ADDR-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; ADDR-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; ADDR-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP7]], align 64 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @load.nxv16i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 64 +; ORIGINS-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; ORIGINS-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; ORIGINS-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; ORIGINS-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416 +; ORIGINS-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ORIGINS-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP4]], align 64 +; ORIGINS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 64 +; ORIGINS-NEXT: ret void +; + load , ptr %p + ret void +} + + +define void @store.nxv1i32(ptr %p) sanitize_memory { +; CHECK-LABEL: @store.nxv1i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-NEXT: store zeroinitializer, ptr [[TMP3]], align 4 +; CHECK-NEXT: store zeroinitializer, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @store.nxv1i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ADDR-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; ADDR-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ADDR-NEXT: store zeroinitializer, ptr [[TMP6]], align 4 +; ADDR-NEXT: store zeroinitializer, ptr [[P]], align 4 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @store.nxv1i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ORIGINS-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; ORIGINS-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; ORIGINS-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416 +; ORIGINS-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; ORIGINS-NEXT: store zeroinitializer, ptr [[TMP3]], align 4 +; ORIGINS-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.nxv1i32( zeroinitializer) +; ORIGINS-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP6]], 0 +; ORIGINS-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP13:%.*]], !prof [[PROF0:![0-9]+]] +; ORIGINS: 7: +; ORIGINS-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32() +; ORIGINS-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], 4 +; ORIGINS-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 3 +; ORIGINS-NEXT: [[TMP11:%.*]] = udiv i32 [[TMP10]], 4 +; ORIGINS-NEXT: br label [[DOTSPLIT:%.*]] +; ORIGINS: .split: +; ORIGINS-NEXT: [[IV:%.*]] = phi i32 [ 0, [[TMP7]] ], [ [[IV_NEXT:%.*]], [[DOTSPLIT]] ] +; ORIGINS-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP5]], i32 [[IV]] +; ORIGINS-NEXT: store i32 0, ptr [[TMP12]], align 4 +; ORIGINS-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; ORIGINS-NEXT: [[IV_CHECK:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP11]] +; ORIGINS-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; ORIGINS: .split.split: +; ORIGINS-NEXT: br label [[TMP13]] +; ORIGINS: 13: +; ORIGINS-NEXT: store zeroinitializer, ptr [[P]], align 4 +; ORIGINS-NEXT: ret void +; + store zeroinitializer, ptr %p + ret void +} +define void @store.nxv2i32(ptr %p) sanitize_memory { +; CHECK-LABEL: @store.nxv2i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-NEXT: store zeroinitializer, ptr [[TMP3]], align 8 +; CHECK-NEXT: store zeroinitializer, ptr [[P]], align 8 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @store.nxv2i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ADDR-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; ADDR-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ADDR-NEXT: store zeroinitializer, ptr [[TMP6]], align 8 +; ADDR-NEXT: store zeroinitializer, ptr [[P]], align 8 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @store.nxv2i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ORIGINS-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; ORIGINS-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; ORIGINS-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416 +; ORIGINS-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; ORIGINS-NEXT: store zeroinitializer, ptr [[TMP3]], align 8 +; ORIGINS-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.nxv2i32( zeroinitializer) +; ORIGINS-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP6]], 0 +; ORIGINS-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP13:%.*]], !prof [[PROF0]] +; ORIGINS: 7: +; ORIGINS-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32() +; ORIGINS-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], 8 +; ORIGINS-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 3 +; ORIGINS-NEXT: [[TMP11:%.*]] = udiv i32 [[TMP10]], 4 +; ORIGINS-NEXT: br label [[DOTSPLIT:%.*]] +; ORIGINS: .split: +; ORIGINS-NEXT: [[IV:%.*]] = phi i32 [ 0, [[TMP7]] ], [ [[IV_NEXT:%.*]], [[DOTSPLIT]] ] +; ORIGINS-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP5]], i32 [[IV]] +; ORIGINS-NEXT: store i32 0, ptr [[TMP12]], align 4 +; ORIGINS-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; ORIGINS-NEXT: [[IV_CHECK:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP11]] +; ORIGINS-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; ORIGINS: .split.split: +; ORIGINS-NEXT: br label [[TMP13]] +; ORIGINS: 13: +; ORIGINS-NEXT: store zeroinitializer, ptr [[P]], align 8 +; ORIGINS-NEXT: ret void +; + store zeroinitializer, ptr %p + ret void +} + +define void @store.nxv4i32(ptr %p) sanitize_memory { +; CHECK-LABEL: @store.nxv4i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-NEXT: store zeroinitializer, ptr [[TMP3]], align 16 +; CHECK-NEXT: store zeroinitializer, ptr [[P]], align 16 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @store.nxv4i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ADDR-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; ADDR-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ADDR-NEXT: store zeroinitializer, ptr [[TMP6]], align 16 +; ADDR-NEXT: store zeroinitializer, ptr [[P]], align 16 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @store.nxv4i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ORIGINS-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; ORIGINS-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; ORIGINS-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416 +; ORIGINS-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; ORIGINS-NEXT: store zeroinitializer, ptr [[TMP3]], align 16 +; ORIGINS-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32( zeroinitializer) +; ORIGINS-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP6]], 0 +; ORIGINS-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP13:%.*]], !prof [[PROF0]] +; ORIGINS: 7: +; ORIGINS-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32() +; ORIGINS-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], 16 +; ORIGINS-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 3 +; ORIGINS-NEXT: [[TMP11:%.*]] = udiv i32 [[TMP10]], 4 +; ORIGINS-NEXT: br label [[DOTSPLIT:%.*]] +; ORIGINS: .split: +; ORIGINS-NEXT: [[IV:%.*]] = phi i32 [ 0, [[TMP7]] ], [ [[IV_NEXT:%.*]], [[DOTSPLIT]] ] +; ORIGINS-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP5]], i32 [[IV]] +; ORIGINS-NEXT: store i32 0, ptr [[TMP12]], align 4 +; ORIGINS-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; ORIGINS-NEXT: [[IV_CHECK:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP11]] +; ORIGINS-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; ORIGINS: .split.split: +; ORIGINS-NEXT: br label [[TMP13]] +; ORIGINS: 13: +; ORIGINS-NEXT: store zeroinitializer, ptr [[P]], align 16 +; ORIGINS-NEXT: ret void +; + store zeroinitializer, ptr %p + ret void +} + +define void @store.nxv8i32(ptr %p) sanitize_memory { +; CHECK-LABEL: @store.nxv8i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-NEXT: store zeroinitializer, ptr [[TMP3]], align 32 +; CHECK-NEXT: store zeroinitializer, ptr [[P]], align 32 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @store.nxv8i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ADDR-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; ADDR-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ADDR-NEXT: store zeroinitializer, ptr [[TMP6]], align 32 +; ADDR-NEXT: store zeroinitializer, ptr [[P]], align 32 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @store.nxv8i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ORIGINS-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; ORIGINS-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; ORIGINS-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416 +; ORIGINS-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; ORIGINS-NEXT: store zeroinitializer, ptr [[TMP3]], align 32 +; ORIGINS-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.nxv8i32( zeroinitializer) +; ORIGINS-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP6]], 0 +; ORIGINS-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP13:%.*]], !prof [[PROF0]] +; ORIGINS: 7: +; ORIGINS-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32() +; ORIGINS-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], 32 +; ORIGINS-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 3 +; ORIGINS-NEXT: [[TMP11:%.*]] = udiv i32 [[TMP10]], 4 +; ORIGINS-NEXT: br label [[DOTSPLIT:%.*]] +; ORIGINS: .split: +; ORIGINS-NEXT: [[IV:%.*]] = phi i32 [ 0, [[TMP7]] ], [ [[IV_NEXT:%.*]], [[DOTSPLIT]] ] +; ORIGINS-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP5]], i32 [[IV]] +; ORIGINS-NEXT: store i32 0, ptr [[TMP12]], align 4 +; ORIGINS-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; ORIGINS-NEXT: [[IV_CHECK:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP11]] +; ORIGINS-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; ORIGINS: .split.split: +; ORIGINS-NEXT: br label [[TMP13]] +; ORIGINS: 13: +; ORIGINS-NEXT: store zeroinitializer, ptr [[P]], align 32 +; ORIGINS-NEXT: ret void +; + store zeroinitializer, ptr %p + ret void +} + +define void @store.nxv16i32(ptr %p) sanitize_memory { +; CHECK-LABEL: @store.nxv16i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-NEXT: store zeroinitializer, ptr [[TMP3]], align 64 +; CHECK-NEXT: store zeroinitializer, ptr [[P]], align 64 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @store.nxv16i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ADDR-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; ADDR-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ADDR-NEXT: store zeroinitializer, ptr [[TMP6]], align 64 +; ADDR-NEXT: store zeroinitializer, ptr [[P]], align 64 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @store.nxv16i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ORIGINS-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; ORIGINS-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; ORIGINS-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416 +; ORIGINS-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; ORIGINS-NEXT: store zeroinitializer, ptr [[TMP3]], align 64 +; ORIGINS-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.nxv16i32( zeroinitializer) +; ORIGINS-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP6]], 0 +; ORIGINS-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP13:%.*]], !prof [[PROF0]] +; ORIGINS: 7: +; ORIGINS-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32() +; ORIGINS-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], 64 +; ORIGINS-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 3 +; ORIGINS-NEXT: [[TMP11:%.*]] = udiv i32 [[TMP10]], 4 +; ORIGINS-NEXT: br label [[DOTSPLIT:%.*]] +; ORIGINS: .split: +; ORIGINS-NEXT: [[IV:%.*]] = phi i32 [ 0, [[TMP7]] ], [ [[IV_NEXT:%.*]], [[DOTSPLIT]] ] +; ORIGINS-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP5]], i32 [[IV]] +; ORIGINS-NEXT: store i32 0, ptr [[TMP12]], align 4 +; ORIGINS-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; ORIGINS-NEXT: [[IV_CHECK:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP11]] +; ORIGINS-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; ORIGINS: .split.split: +; ORIGINS-NEXT: br label [[TMP13]] +; ORIGINS: 13: +; ORIGINS-NEXT: store zeroinitializer, ptr [[P]], align 64 +; ORIGINS-NEXT: ret void +; + store zeroinitializer, ptr %p + ret void +} diff --git a/llvm/test/MC/AArch64/SVE/pfalse.s b/llvm/test/MC/AArch64/SVE/pfalse.s index 7ac4d5c44f433..4124da8ac92a0 100644 --- a/llvm/test/MC/AArch64/SVE/pfalse.s +++ b/llvm/test/MC/AArch64/SVE/pfalse.s @@ -14,9 +14,3 @@ pfalse p15.b // CHECK-ENCODING: [0x0f,0xe4,0x18,0x25] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: 2518e40f - -pfalse pn15.b -// CHECK-INST: pfalse p15.b -// CHECK-ENCODING: [0x0f,0xe4,0x18,0x25] -// CHECK-ERROR: instruction requires: sve or sme -// CHECK-UNKNOWN: 2518e40f diff --git a/llvm/test/MC/AArch64/SVE/predicate-as-counter-aliases.s b/llvm/test/MC/AArch64/SVE/predicate-as-counter-aliases.s new file mode 100644 index 0000000000000..bca2cf913ff64 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE/predicate-as-counter-aliases.s @@ -0,0 +1,50 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ +// RUN: | llvm-objdump --no-print-imm-hex -d --mattr=+sve2p1 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ +// RUN: | llvm-objdump --no-print-imm-hex -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN + + +ldr pn0, [x0] +// CHECK-INST: ldr p0, [x0] +// CHECK-ENCODING: [0x00,0x00,0x80,0x85] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 85800000 + +ldr pn5, [x10, #255, mul vl] +// CHECK-INST: ldr p5, [x10, #255, mul vl] +// CHECK-ENCODING: [0x45,0x1d,0x9f,0x85] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 859f1d45 + + +str pn0, [x0] +// CHECK-INST: str p0, [x0] +// CHECK-ENCODING: [0x00,0x00,0x80,0xe5] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: e5800000 + +str pn5, [x10, #255, mul vl] +// CHECK-INST: str p5, [x10, #255, mul vl] +// CHECK-ENCODING: [0x45,0x1d,0x9f,0xe5] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: e59f1d45 + + +mov pn0.b, pn0.b +// CHECK-INST: mov p0.b, p0.b +// CHECK-ENCODING: [0x00,0x40,0x80,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25804000 + + +pfalse pn15.b +// CHECK-INST: pfalse p15.b +// CHECK-ENCODING: [0x0f,0xe4,0x18,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 2518e40f diff --git a/llvm/test/MC/AArch64/armv8.3a-signed-pointer.s b/llvm/test/MC/AArch64/armv8.3a-signed-pointer.s index dad4bc667853b..e95c9309a3d4d 100644 --- a/llvm/test/MC/AArch64/armv8.3a-signed-pointer.s +++ b/llvm/test/MC/AArch64/armv8.3a-signed-pointer.s @@ -96,45 +96,84 @@ // ALL-EMPTY: // ALL-EMPTY: - paciasp -// CHECK-NEXT: paciasp // encoding: [0x3f,0x23,0x03,0xd5] -// NO83-NEXT: hint #25 // encoding: [0x3f,0x23,0x03,0xd5] - autiasp -// CHECK-NEXT: autiasp // encoding: [0xbf,0x23,0x03,0xd5] -// NO83-NEXT: hint #29 // encoding: [0xbf,0x23,0x03,0xd5] - paciaz -// CHECK-NEXT: paciaz // encoding: [0x1f,0x23,0x03,0xd5] -// NO83-NEXT: hint #24 // encoding: [0x1f,0x23,0x03,0xd5] - autiaz -// CHECK-NEXT: autiaz // encoding: [0x9f,0x23,0x03,0xd5] -// NO83-NEXT: hint #28 // encoding: [0x9f,0x23,0x03,0xd5] + hint #7 + xpaclri +// CHECK-NEXT: xpaclri // encoding: [0xff,0x20,0x03,0xd5] +// CHECK-NEXT: xpaclri // encoding: [0xff,0x20,0x03,0xd5] +// NO83-NEXT: hint #7 // encoding: [0xff,0x20,0x03,0xd5] +// NO83-NEXT: hint #7 // encoding: [0xff,0x20,0x03,0xd5] + hint #8 pacia1716 // CHECK-NEXT: pacia1716 // encoding: [0x1f,0x21,0x03,0xd5] +// CHECK-NEXT: pacia1716 // encoding: [0x1f,0x21,0x03,0xd5] // NO83-NEXT: hint #8 // encoding: [0x1f,0x21,0x03,0xd5] +// NO83-NEXT: hint #8 // encoding: [0x1f,0x21,0x03,0xd5] + hint #10 + pacib1716 +// CHECK-NEXT: pacib1716 // encoding: [0x5f,0x21,0x03,0xd5] +// CHECK-NEXT: pacib1716 // encoding: [0x5f,0x21,0x03,0xd5] +// NO83-NEXT: hint #10 // encoding: [0x5f,0x21,0x03,0xd5] +// NO83-NEXT: hint #10 // encoding: [0x5f,0x21,0x03,0xd5] + hint #12 autia1716 // CHECK-NEXT: autia1716 // encoding: [0x9f,0x21,0x03,0xd5] +// CHECK-NEXT: autia1716 // encoding: [0x9f,0x21,0x03,0xd5] // NO83-NEXT: hint #12 // encoding: [0x9f,0x21,0x03,0xd5] - pacibsp -// CHECK-NEXT: pacibsp // encoding: [0x7f,0x23,0x03,0xd5] -// NO83-NEXT: hint #27 // encoding: [0x7f,0x23,0x03,0xd5] - autibsp -// CHECK-NEXT: autibsp // encoding: [0xff,0x23,0x03,0xd5] -// NO83-NEXT: hint #31 // encoding: [0xff,0x23,0x03,0xd5] +// NO83-NEXT: hint #12 // encoding: [0x9f,0x21,0x03,0xd5] + hint #14 + autib1716 +// CHECK-NEXT: autib1716 // encoding: [0xdf,0x21,0x03,0xd5] +// CHECK-NEXT: autib1716 // encoding: [0xdf,0x21,0x03,0xd5] +// NO83-NEXT: hint #14 // encoding: [0xdf,0x21,0x03,0xd5] +// NO83-NEXT: hint #14 // encoding: [0xdf,0x21,0x03,0xd5] + hint #24 + paciaz +// CHECK-NEXT: paciaz // encoding: [0x1f,0x23,0x03,0xd5] +// CHECK-NEXT: paciaz // encoding: [0x1f,0x23,0x03,0xd5] +// NO83-NEXT: hint #24 // encoding: [0x1f,0x23,0x03,0xd5] +// NO83-NEXT: hint #24 // encoding: [0x1f,0x23,0x03,0xd5] + hint #25 + paciasp +// CHECK-NEXT: paciasp // encoding: [0x3f,0x23,0x03,0xd5] +// CHECK-NEXT: paciasp // encoding: [0x3f,0x23,0x03,0xd5] +// NO83-NEXT: hint #25 // encoding: [0x3f,0x23,0x03,0xd5] +// NO83-NEXT: hint #25 // encoding: [0x3f,0x23,0x03,0xd5] + hint #26 pacibz // CHECK-NEXT: pacibz // encoding: [0x5f,0x23,0x03,0xd5] +// CHECK-NEXT: pacibz // encoding: [0x5f,0x23,0x03,0xd5] // NO83-NEXT: hint #26 // encoding: [0x5f,0x23,0x03,0xd5] +// NO83-NEXT: hint #26 // encoding: [0x5f,0x23,0x03,0xd5] + hint #27 + pacibsp +// CHECK-NEXT: pacibsp // encoding: [0x7f,0x23,0x03,0xd5] +// CHECK-NEXT: pacibsp // encoding: [0x7f,0x23,0x03,0xd5] +// NO83-NEXT: hint #27 // encoding: [0x7f,0x23,0x03,0xd5] +// NO83-NEXT: hint #27 // encoding: [0x7f,0x23,0x03,0xd5] + hint #28 + autiaz +// CHECK-NEXT: autiaz // encoding: [0x9f,0x23,0x03,0xd5] +// CHECK-NEXT: autiaz // encoding: [0x9f,0x23,0x03,0xd5] +// NO83-NEXT: hint #28 // encoding: [0x9f,0x23,0x03,0xd5] +// NO83-NEXT: hint #28 // encoding: [0x9f,0x23,0x03,0xd5] + hint #29 + autiasp +// CHECK-NEXT: autiasp // encoding: [0xbf,0x23,0x03,0xd5] +// CHECK-NEXT: autiasp // encoding: [0xbf,0x23,0x03,0xd5] +// NO83-NEXT: hint #29 // encoding: [0xbf,0x23,0x03,0xd5] +// NO83-NEXT: hint #29 // encoding: [0xbf,0x23,0x03,0xd5] + hint #30 autibz // CHECK-NEXT: autibz // encoding: [0xdf,0x23,0x03,0xd5] +// CHECK-NEXT: autibz // encoding: [0xdf,0x23,0x03,0xd5] // NO83-NEXT: hint #30 // encoding: [0xdf,0x23,0x03,0xd5] - pacib1716 -// CHECK-NEXT: pacib1716 // encoding: [0x5f,0x21,0x03,0xd5] -// NO83-NEXT: hint #10 // encoding: [0x5f,0x21,0x03,0xd5] - autib1716 -// CHECK-NEXT: autib1716 // encoding: [0xdf,0x21,0x03,0xd5] -// NO83-NEXT: hint #14 // encoding: [0xdf,0x21,0x03,0xd5] - xpaclri -// CHECK-NEXT: xpaclri // encoding: [0xff,0x20,0x03,0xd5] -// NO83-NEXT: hint #7 // encoding: [0xff,0x20,0x03,0xd5] +// NO83-NEXT: hint #30 // encoding: [0xdf,0x23,0x03,0xd5] + hint #31 + autibsp +// CHECK-NEXT: autibsp // encoding: [0xff,0x23,0x03,0xd5] +// CHECK-NEXT: autibsp // encoding: [0xff,0x23,0x03,0xd5] +// NO83-NEXT: hint #31 // encoding: [0xff,0x23,0x03,0xd5] +// NO83-NEXT: hint #31 // encoding: [0xff,0x23,0x03,0xd5] // ALL-EMPTY: pacia x0, x1 diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s index 7856dcf94cd1c..db1856e1c6677 100644 --- a/llvm/test/MC/RISCV/attribute-arch.s +++ b/llvm/test/MC/RISCV/attribute-arch.s @@ -1,7 +1,8 @@ ## Arch string without version. # RUN: llvm-mc %s -triple=riscv32 -filetype=asm | FileCheck %s -# RUN: llvm-mc %s -triple=riscv64 -filetype=asm | FileCheck %s +# RUN: llvm-mc %s -triple=riscv64 -filetype=asm \ +# RUN: | FileCheck --check-prefixes=CHECK,CHECK-RV64 %s .attribute arch, "rv32i" # CHECK: attribute 5, "rv32i2p0" @@ -12,6 +13,12 @@ .attribute arch, "rv32i2p0" # CHECK: attribute 5, "rv32i2p0" +.attribute arch, "rv32e" +# CHECK: attribute 5, "rv32e2p0" + +.attribute arch, "rv64e" +# CHECK-RV64: attribute 5, "rv64e2p0" + .attribute arch, "rv32i2_m2" # CHECK: attribute 5, "rv32i2p0_m2p0" diff --git a/llvm/test/MC/RISCV/elf-flags.s b/llvm/test/MC/RISCV/elf-flags.s index 543eadede1f70..546e129fb7194 100644 --- a/llvm/test/MC/RISCV/elf-flags.s +++ b/llvm/test/MC/RISCV/elf-flags.s @@ -5,6 +5,9 @@ # RUN: llvm-mc -triple=riscv32 -mattr=+e -filetype=obj < %s \ # RUN: | llvm-readobj --file-headers - \ # RUN: | FileCheck -check-prefix=CHECK-RVE %s +# RUN: llvm-mc -triple=riscv64 -mattr=+e -filetype=obj < %s \ +# RUN: | llvm-readobj --file-headers - \ +# RUN: | FileCheck -check-prefix=CHECK-RVE %s # RUN: llvm-mc -triple=riscv32 -mattr=+experimental-ztso -filetype=obj < %s | llvm-readobj --file-headers - | FileCheck -check-prefixes=CHECK-TSO %s # RUN: llvm-mc -triple=riscv64 -mattr=+experimental-ztso -filetype=obj < %s | llvm-readobj --file-headers - | FileCheck -check-prefixes=CHECK-TSO %s diff --git a/llvm/test/MC/RISCV/invalid-attribute.s b/llvm/test/MC/RISCV/invalid-attribute.s index 761a98902d5ef..3514452997266 100644 --- a/llvm/test/MC/RISCV/invalid-attribute.s +++ b/llvm/test/MC/RISCV/invalid-attribute.s @@ -7,7 +7,7 @@ # RUN: not llvm-mc %s -triple=riscv64 -filetype=asm 2>&1 | FileCheck %s .attribute arch, "foo" -# CHECK: [[@LINE-1]]:18: error: invalid arch name 'foo', string must begin with rv32{i,e,g} or rv64{i,g} +# CHECK: [[@LINE-1]]:18: error: invalid arch name 'foo', string must begin with rv32{i,e,g} or rv64{i,e,g} .attribute arch, "rv32i2p0_y2p0" # CHECK: [[@LINE-1]]:18: error: invalid arch name 'rv32i2p0_y2p0', invalid standard user-level extension 'y' diff --git a/llvm/test/MC/RISCV/mattr-invalid-combination.s b/llvm/test/MC/RISCV/mattr-invalid-combination.s deleted file mode 100644 index f75fd3723ed49..0000000000000 --- a/llvm/test/MC/RISCV/mattr-invalid-combination.s +++ /dev/null @@ -1,4 +0,0 @@ -# RUN: not --crash llvm-mc -triple riscv64 -mattr=+e < %s 2>&1 \ -# RUN: | FileCheck %s -check-prefix=RV64E - -# RV64E: LLVM ERROR: RV32E can't be enabled for an RV64 target diff --git a/llvm/test/MC/RISCV/rv32e-invalid.s b/llvm/test/MC/RISCV/rv32e-invalid.s index 760e7d49e5c46..9c19d3f40bcff 100644 --- a/llvm/test/MC/RISCV/rv32e-invalid.s +++ b/llvm/test/MC/RISCV/rv32e-invalid.s @@ -2,9 +2,13 @@ # RUN: llvm-mc -filetype=obj -triple=riscv32 < %s \ # RUN: | llvm-objdump --mattr=+e -M no-aliases -d -r - \ # RUN: | FileCheck -check-prefix=CHECK-DIS %s +# RUN: not llvm-mc -triple riscv64 -mattr=+e < %s 2>&1 | FileCheck %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 < %s \ +# RUN: | llvm-objdump --mattr=+e -M no-aliases -d -r - \ +# RUN: | FileCheck -check-prefix=CHECK-DIS %s # Perform a simple check that registers x16-x31 (and the equivalent ABI names) -# are rejected for RV32E, when both assembling and disassembling. +# are rejected for RV32E/RV64E, when both assembling and disassembling. # CHECK-DIS: 37 18 00 00 diff --git a/llvm/test/MC/RISCV/rv32e-valid.s b/llvm/test/MC/RISCV/rv32e-valid.s index b0f435da50544..c2b77736d92b5 100644 --- a/llvm/test/MC/RISCV/rv32e-valid.s +++ b/llvm/test/MC/RISCV/rv32e-valid.s @@ -3,6 +3,11 @@ # RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+e < %s \ # RUN: | llvm-objdump -M no-aliases -d -r - \ # RUN: | FileCheck -check-prefixes=CHECK-OBJ,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc %s -triple=riscv64 -riscv-no-aliases -mattr=+e -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+e < %s \ +# RUN: | llvm-objdump -M no-aliases -d -r - \ +# RUN: | FileCheck -check-prefixes=CHECK-OBJ,CHECK-ASM-AND-OBJ %s # This file provides a basic test for RV32E, checking that the expected # set of registers and instructions are accepted. diff --git a/llvm/test/MC/RISCV/rv64e-valid.s b/llvm/test/MC/RISCV/rv64e-valid.s new file mode 100644 index 0000000000000..4780fd6ece4ab --- /dev/null +++ b/llvm/test/MC/RISCV/rv64e-valid.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc %s -triple=riscv64 -riscv-no-aliases -mattr=+e -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+e < %s \ +# RUN: | llvm-objdump -M no-aliases -d -r - \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM-AND-OBJ %s + +# This file provides a basic test for RV64E, checking that the expected +# set of registers and instructions are accepted. It only tests instructions +# that are not valid in RV32E. + +# CHECK-ASM-AND-OBJ: ld a4, 25(a5) +ld x14, 25(x15) +# CHECK-ASM-AND-OBJ: sd a2, 36(a3) +sd a2, 36(a3) + +# CHECK-ASM-AND-OBJ: addiw a4, a5, 37 +addiw a4, a5, 37 +# CHECK-ASM-AND-OBJ: slliw t1, t1, 31 +slliw t1, t1, 31 +# CHECK-ASM-AND-OBJ: srliw a0, a4, 0 +srliw a0, a4, 0 +# CHECK-ASM-AND-OBJ: sraiw a1, sp, 15 +sraiw a1, sp, 15 +# CHECK-ASM-AND-OBJ: slliw t0, t1, 13 +slliw t0, t1, 13 + +# CHECK-ASM-AND-OBJ: addw ra, zero, zero +addw ra, zero, zero +# CHECK-ASM-AND-OBJ: subw t0, t2, t1 +subw t0, t2, t1 +# CHECK-ASM-AND-OBJ: sllw a5, a4, a3 +sllw a5, a4, a3 +# CHECK-ASM-AND-OBJ: srlw a0, s0, t0 +srlw a0, s0, t0 +# CHECK-ASM-AND-OBJ: sraw t0, a3, zero +sraw t0, a3, zero diff --git a/llvm/test/MC/RISCV/target-abi-invalid.s b/llvm/test/MC/RISCV/target-abi-invalid.s index 20e9f89153e05..d7dba182fd166 100644 --- a/llvm/test/MC/RISCV/target-abi-invalid.s +++ b/llvm/test/MC/RISCV/target-abi-invalid.s @@ -32,6 +32,8 @@ # RUN: | FileCheck -check-prefix=RV32EF-LP64F %s # RUN: llvm-mc -triple=riscv32 -mattr=+e,+d -target-abi lp64f < %s 2>&1 \ # RUN: | FileCheck -check-prefix=RV32EFD-LP64D %s +# RUN: llvm-mc -triple=riscv32 -mattr=+e -target-abi lp64e %s 2>&1 \ +# RUN: | FileCheck -check-prefix=RV32E-LP64E %s # RV32I-LP64: 64-bit ABIs are not supported for 32-bit targets (ignoring target-abi) # RV32IF-LP64F: 64-bit ABIs are not supported for 32-bit targets (ignoring target-abi) @@ -39,6 +41,7 @@ # RV32E-LP64: 64-bit ABIs are not supported for 32-bit targets (ignoring target-abi) # RV32EF-LP64F: 64-bit ABIs are not supported for 32-bit targets (ignoring target-abi) # RV32EFD-LP64D: 64-bit ABIs are not supported for 32-bit targets (ignoring target-abi) +# RV32E-LP64E: 64-bit ABIs are not supported for 32-bit targets (ignoring target-abi) # RUN: llvm-mc -triple=riscv32 -target-abi ilp32f < %s 2>&1 \ # RUN: | FileCheck -check-prefix=RV32I-ILP32F %s @@ -76,4 +79,18 @@ # RV32EFD-ILP32F: Only the ilp32e ABI is supported for RV32E (ignoring target-abi) # RV32EFD-ILP32D: Only the ilp32e ABI is supported for RV32E (ignoring target-abi) +# RUN: llvm-mc -triple=riscv64 -mattr=+e -target-abi lp64 < %s 2>&1 \ +# RUN: | FileCheck -check-prefix=RV64EF-LP64F %s +# RUN: llvm-mc -triple=riscv64 -mattr=+e,+f -target-abi lp64f < %s 2>&1 \ +# RUN: | FileCheck -check-prefix=RV64EF-LP64F %s +# RUN: llvm-mc -triple=riscv64 -mattr=+e,+d -target-abi lp64f < %s 2>&1 \ +# RUN: | FileCheck -check-prefix=RV64EFD-LP64F %s +# RUN: llvm-mc -triple=riscv64 -mattr=+e,+d -target-abi lp64d < %s 2>&1 \ +# RUN: | FileCheck -check-prefix=RV64EFD-LP64D %s + +# RV64E-LP64: Only the lp64e ABI is supported for RV64E (ignoring target-abi) +# RV64EF-LP64F: Only the lp64e ABI is supported for RV64E (ignoring target-abi) +# RV64EFD-LP64F: Only the lp64e ABI is supported for RV64E (ignoring target-abi) +# RV64EFD-LP64D: Only the lp64e ABI is supported for RV64E (ignoring target-abi) + nop diff --git a/llvm/test/MC/RISCV/target-abi-valid.s b/llvm/test/MC/RISCV/target-abi-valid.s index dab4420d0248c..63c0d4bf2e468 100644 --- a/llvm/test/MC/RISCV/target-abi-valid.s +++ b/llvm/test/MC/RISCV/target-abi-valid.s @@ -47,6 +47,10 @@ # RUN: | llvm-readobj --file-headers - \ # RUN: | FileCheck -check-prefix=CHECK-RVE %s +# RUN: llvm-mc -triple=riscv64 -target-abi lp64e -filetype=obj < %s \ +# RUN: | llvm-readobj --file-headers - \ +# RUN: | FileCheck -check-prefix=CHECK-RVE %s + # CHECK-NONE: Flags [ (0x0) # CHECK-NONE-NEXT: ] diff --git a/llvm/test/ThinLTO/X86/memprof-basic.ll b/llvm/test/ThinLTO/X86/memprof-basic.ll new file mode 100644 index 0000000000000..d8c78d270f277 --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-basic.ll @@ -0,0 +1,157 @@ +;; Test callsite context graph generation for simple call graph with +;; two memprof contexts and no inlining. +;; +;; Original code looks like: +;; +;; char *bar() { +;; return new char[10]; +;; } +;; +;; char *baz() { +;; return bar(); +;; } +;; +;; char *foo() { +;; return baz(); +;; } +;; +;; int main(int argc, char **argv) { +;; char *x = foo(); +;; char *y = foo(); +;; memset(x, 0, 10); +;; memset(y, 0, 10); +;; delete[] x; +;; sleep(10); +;; delete[] y; +;; return 0; +;; } +;; +;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; memory freed after sleep(10) results in cold lifetimes. +;; +;; The IR was then reduced using llvm-reduce with the expected FileCheck input. + +; RUN: opt -thinlto-bc %s >%t.o +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_ZdaPv, \ +; RUN: -r=%t.o,sleep, \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP + +; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT + + +source_filename = "memprof-basic.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() { +entry: + %call = call ptr @_Z3foov(), !callsite !0 + %call1 = call ptr @_Z3foov(), !callsite !1 + ret i32 0 +} + +declare void @_ZdaPv() + +declare i32 @sleep() + +define internal ptr @_Z3barv() { +entry: + %call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7 + ret ptr null +} + +declare ptr @_Znam(i64) + +define internal ptr @_Z3bazv() { +entry: + %call = call ptr @_Z3barv(), !callsite !8 + ret ptr null +} + +define internal ptr @_Z3foov() { +entry: + %call = call ptr @_Z3bazv(), !callsite !9 + ret ptr null +} + +; uselistorder directives +uselistorder ptr @_Z3foov, { 1, 0 } + +!0 = !{i64 8632435727821051414} +!1 = !{i64 -3421689549917153178} +!2 = !{!3, !5} +!3 = !{!4, !"notcold"} +!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!5 = !{!6, !"cold"} +!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!7 = !{i64 9086428284934609951} +!8 = !{i64 -5964873800580613432} +!9 = !{i64 2732490490862098848} + + +; DUMP: CCG before cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[BAR:0x[a-z0-9]+]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 2, 3, 0 +; DUMP: AllocType 2 StackIds: 2, 3, 1 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 + +; DUMP: Node [[BAZ]] +; DUMP: Callee: 9832687305761716512 (_Z3barv) Clones: 0 StackIds: 2 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 + +; DUMP: Node [[FOO]] +; DUMP: Callee: 5878270615442837395 (_Z3bazv) Clones: 0 StackIds: 3 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 + +; DUMP: Node [[MAIN1]] +; DUMP: Callee: 6731117468105397038 (_Z3foov) Clones: 0 StackIds: 0 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: Callee: 6731117468105397038 (_Z3foov) Clones: 0 StackIds: 1 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: + + +; DOT: digraph "postbuild" { +; DOT: label="postbuild"; +; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> alloc}"]; +; DOT: Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 12481870273128938184\n_Z3bazv -\> _Z3barv}"]; +; DOT: Node[[BAZ]] -> Node[[BAR]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\n_Z3foov -\> _Z3bazv}"]; +; DOT: Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOT: } diff --git a/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll new file mode 100644 index 0000000000000..772b319e0715e --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll @@ -0,0 +1,229 @@ +;; Test callsite context graph generation for call graph with with MIBs +;; that have pruned contexts that partially match multiple inlined +;; callsite contexts, requiring duplication of context ids and nodes +;; while matching callsite nodes onto the graph. +;; +;; Original code looks like: +;; +;; char *D() { +;; return new char[10]; +;; } +;; +;; char *F() { +;; return D(); +;; } +;; +;; char *C() { +;; return D(); +;; } +;; +;; char *B() { +;; return C(); +;; } +;; +;; char *E() { +;; return C(); +;; } +;; int main(int argc, char **argv) { +;; char *x = B(); // cold +;; char *y = E(); // cold +;; char *z = F(); // default +;; memset(x, 0, 10); +;; memset(y, 0, 10); +;; memset(z, 0, 10); +;; delete[] z; +;; sleep(10); +;; delete[] x; +;; delete[] y; +;; return 0; +;; } +;; +;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; memory freed after sleep(10) results in cold lifetimes. +;; +;; The code below was created by forcing inlining of C into both B and E. +;; Since both allocation contexts via C are cold, the matched memprof +;; metadata has the context pruned above C's callsite. This requires +;; matching the stack node for C to callsites where it was inlined (i.e. +;; the callsites in B and E that have callsite metadata that includes C's). +;; It also requires duplication of that node in the graph as well as the +;; duplication of the context ids along that path through the graph, +;; so that we can represent the duplicated (via inlining) C callsite. +;; +;; The IR was then reduced using llvm-reduce with the expected FileCheck input. + +; RUN: opt -thinlto-bc %s >%t.o +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_ZdaPv, \ +; RUN: -r=%t.o,sleep, \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP + +; RUN: cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE +; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST + + +source_filename = "duplicate-context-ids.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define internal ptr @_Z1Dv() { +entry: + %call = call ptr @_Znam(i64 0), !memprof !0, !callsite !5 + ret ptr null +} + +declare ptr @_Znam(i64) + +define internal ptr @_Z1Fv() { +entry: + %call = call ptr @_Z1Dv(), !callsite !6 + ret ptr null +} + +define internal ptr @_Z1Cv() { +entry: + %call = call ptr @_Z1Dv(), !callsite !7 + ret ptr null +} + +define internal ptr @_Z1Bv() { +entry: + %call.i = call ptr @_Z1Dv(), !callsite !8 + ret ptr null +} + +define internal ptr @_Z1Ev() { +entry: + %call.i = call ptr @_Z1Dv(), !callsite !9 + ret ptr null +} + +declare i32 @main() + +declare void @_ZdaPv() + +declare i32 @sleep() + +!0 = !{!1, !3} +!1 = !{!2, !"cold"} +!2 = !{i64 6541423618768552252, i64 -6270142974039008131} +!3 = !{!4, !"notcold"} +!4 = !{i64 6541423618768552252, i64 -4903163940066524832} +!5 = !{i64 6541423618768552252} +!6 = !{i64 -4903163940066524832} +!7 = !{i64 -6270142974039008131} +!8 = !{i64 -6270142974039008131, i64 -184525619819294889} +!9 = !{i64 -6270142974039008131, i64 1905834578520680781} + + +;; After adding only the alloc node memprof metadata, we only have 2 contexts. + +; DUMP: CCG before updating call stack chains: +; DUMP: Callsite Context Graph: +; DUMP: Node [[D:0x[a-z0-9]+]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 2 StackIds: 0 +; DUMP: AllocType 1 StackIds: 1 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1 +; DUMP: Edge from Callee [[D]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2 + +; DUMP: Node [[C]] +; DUMP: null Call +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[C]] AllocTypes: Cold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[F]] +; DUMP: null Call +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 +; DUMP: CallerEdges: + +;; After updating for callsite metadata, we should have generated context ids 3 and 4, +;; along with 2 new nodes for those callsites. All have the same allocation type +;; behavior as the original C node. + +; DUMP: CCG before cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[D]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 2 StackIds: 0 +; DUMP: AllocType 1 StackIds: 1 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 3 4 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 +; DUMP: Edge from Callee [[D]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3 +; DUMP: Edge from Callee [[D]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 +; DUMP: Edge from Callee [[D]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1 + +; DUMP: Node [[F]] +; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 1 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 +; DUMP: CallerEdges: + +; DUMP: Node [[C2]] +; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3 +; DUMP: CallerEdges: + +; DUMP: Node [[B]] +; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0, 2 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4 +; DUMP: CallerEdges: + +; DUMP: Node [[E]] +; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0, 3 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1 +; DUMP: CallerEdges: + + +; DOTPRE: digraph "prestackupdate" { +; DOTPRE: label="prestackupdate"; +; DOTPRE: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"]; +; DOTPRE: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12176601099670543485\nnull call (external)}"]; +; DOTPRE: Node[[C]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"]; +; DOTPRE: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\nnull call (external)}"]; +; DOTPRE: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"]; +; DOTPRE: } + + +; DOTPOST:digraph "postbuild" { +; DOTPOST: label="postbuild"; +; DOTPOST: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"]; +; DOTPOST: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"]; +; DOTPOST: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"]; +; DOTPOST: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"]; +; DOTPOST: Node[[C]] -> Node[[D]][tooltip="ContextIds: 3",fillcolor="cyan"]; +; DOTPOST: Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"]; +; DOTPOST: Node[[B]] -> Node[[D]][tooltip="ContextIds: 4",fillcolor="cyan"]; +; DOTPOST: Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"]; +; DOTPOST: Node[[E]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"]; +; DOTPOST:} diff --git a/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll new file mode 100644 index 0000000000000..af7dece9421a9 --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll @@ -0,0 +1,390 @@ +;; Test callsite context graph generation for call graph with with MIBs +;; that have pruned contexts that partially match multiple inlined +;; callsite contexts, requiring duplication of context ids and nodes +;; while matching callsite nodes onto the graph. This test requires more +;; complex duplication due to multiple contexts for different allocations +;; that share some of the same callsite nodes. +;; +;; Original code looks like: +;; +;; char *D(bool Call1) { +;; if (Call1) +;; return new char[10]; +;; else +;; return new char[10]; +;; } +;; +;; char *C(bool Call1) { +;; return D(Call1); +;; } +;; +;; char *B(bool Call1) { +;; if (Call1) +;; return C(true); +;; else +;; return C(false); +;; } +;; +;; char *A(bool Call1) { +;; return B(Call1); +;; } +;; +;; char *A1() { +;; return A(true); +;; } +;; +;; char *A2() { +;; return A(true); +;; } +;; +;; char *A3() { +;; return A(false); +;; } +;; +;; char *A4() { +;; return A(false); +;; } +;; +;; char *E() { +;; return B(true); +;; } +;; +;; char *F() { +;; return B(false); +;; } +;; +;; int main(int argc, char **argv) { +;; char *a1 = A1(); // cold +;; char *a2 = A2(); // cold +;; char *e = E(); // default +;; char *a3 = A3(); // default +;; char *a4 = A4(); // default +;; char *f = F(); // cold +;; memset(a1, 0, 10); +;; memset(a2, 0, 10); +;; memset(e, 0, 10); +;; memset(a3, 0, 10); +;; memset(a4, 0, 10); +;; memset(f, 0, 10); +;; delete[] a3; +;; delete[] a4; +;; delete[] e; +;; sleep(10); +;; delete[] a1; +;; delete[] a2; +;; delete[] f; +;; return 0; +;; } +;; +;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; memory freed after sleep(10) results in cold lifetimes. +;; +;; The code below was created by forcing inlining of A into its callers, +;; without any other inlining or optimizations. Since both allocation contexts +;; via A for each allocation in D have the same allocation type (cold via +;; A1 and A2 for the first new in D, and non-cold via A3 and A4 for the second +;; new in D, the contexts for those respective allocations are pruned above A. +;; The allocations via E and F are to ensure we don't prune above B. +;; +;; The matching onto the inlined A[1234]->A sequences will require duplication +;; of the context id assigned to the context from A for each allocation in D. +;; This test ensures that we do this correctly in the presence of callsites +;; shared by the different duplicated context ids (i.e. callsite in C). +;; +;; The IR was then reduced using llvm-reduce with the expected FileCheck input. + +; RUN: opt -thinlto-bc %s >%t.o +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Z1Db,plx \ +; RUN: -r=%t.o,_Z1Cb,plx \ +; RUN: -r=%t.o,_Z1Bb,plx \ +; RUN: -r=%t.o,_Z1Ab,plx \ +; RUN: -r=%t.o,_Z2A1v,plx \ +; RUN: -r=%t.o,_Z2A2v,plx \ +; RUN: -r=%t.o,_Z2A3v,plx \ +; RUN: -r=%t.o,_Z2A4v,plx \ +; RUN: -r=%t.o,_Z1Ev,plx \ +; RUN: -r=%t.o,_Z1Fv,plx \ +; RUN: -r=%t.o,_ZdaPv, \ +; RUN: -r=%t.o,sleep, \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define ptr @_Z1Db(i1 %Call1) { +entry: + %call = call ptr @_Znam(i64 0), !memprof !0, !callsite !5 + br label %return + +if.else: ; No predecessors! + %call1 = call ptr @_Znam(i64 0), !memprof !6, !callsite !11 + br label %return + +return: ; preds = %if.else, %entry + ret ptr null +} + +declare ptr @_Znam(i64) + +define ptr @_Z1Cb(i1 %Call1) { +entry: + %call = call ptr @_Z1Db(i1 false), !callsite !12 + ret ptr null +} + +define ptr @_Z1Bb(i1 %Call1) { +entry: + %call = call ptr @_Z1Cb(i1 false), !callsite !13 + br label %return + +if.else: ; No predecessors! + %call1 = call ptr @_Z1Cb(i1 false), !callsite !14 + br label %return + +return: ; preds = %if.else, %entry + ret ptr null +} + +define ptr @_Z1Ab() { +entry: + %call = call ptr @_Z1Bb(i1 false), !callsite !15 + ret ptr null +} + +define ptr @_Z2A1v() { +entry: + %call.i = call ptr @_Z1Bb(i1 false), !callsite !16 + ret ptr null +} + +define ptr @_Z2A2v() { +entry: + %call.i = call ptr @_Z1Bb(i1 false), !callsite !17 + ret ptr null +} + +define ptr @_Z2A3v() { +entry: + %call.i = call ptr @_Z1Bb(i1 false), !callsite !18 + ret ptr null +} + +define ptr @_Z2A4v() { +entry: + %call.i = call ptr @_Z1Bb(i1 false), !callsite !19 + ret ptr null +} + +define ptr @_Z1Ev() { +entry: + %call = call ptr @_Z1Bb(i1 false), !callsite !20 + ret ptr null +} + +define ptr @_Z1Fv() { +entry: + %call = call ptr @_Z1Bb(i1 false), !callsite !21 + ret ptr null +} + +declare i32 @main() + +declare void @_ZdaPv() + +declare i32 @sleep() + +; uselistorder directives +uselistorder ptr @_Znam, { 1, 0 } + +!0 = !{!1, !3} +!1 = !{!2, !"notcold"} +!2 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 1905834578520680781} +!3 = !{!4, !"cold"} +!4 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 -6528110295079665978} +!5 = !{i64 4854880825882961848} +!6 = !{!7, !9} +!7 = !{!8, !"notcold"} +!8 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -6528110295079665978} +!9 = !{!10, !"cold"} +!10 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -4903163940066524832} +!11 = !{i64 -8775068539491628272} +!12 = !{i64 -904694911315397047} +!13 = !{i64 6532298921261778285} +!14 = !{i64 7859682663773658275} +!15 = !{i64 -6528110295079665978} +!16 = !{i64 -6528110295079665978, i64 5747919905719679568} +!17 = !{i64 -6528110295079665978, i64 -5753238080028016843} +!18 = !{i64 -6528110295079665978, i64 1794685869326395337} +!19 = !{i64 -6528110295079665978, i64 5462047985461644151} +!20 = !{i64 1905834578520680781} +!21 = !{i64 -4903163940066524832} + + +;; After adding only the alloc node memprof metadata, we only have 4 contexts (we only +;; match the interesting parts of the pre-update graph here). + +; DUMP: CCG before updating call stack chains: +; DUMP: Callsite Context Graph: + +; DUMP: Node [[D1:0x[a-z0-9]+]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 0, 1, 2 +; DUMP: AllocType 2 StackIds: 0, 1, 3 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 + +; DUMP: Node [[C:0x[a-z0-9]+]] +; DUMP: null Call +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 3 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: Edge from Callee [[D2:0x[a-z0-9]+]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 + +; DUMP: Node [[D2]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 0, 4, 3 +; DUMP: AllocType 2 StackIds: 0, 4, 5 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 3 4 + + +;; After updating for callsite metadata, we should have duplicated the context +;; ids coming from node A (2 and 3) 4 times, for the 4 different callers of A, +;; and used those on new nodes for those callers. Note that while in reality +;; we only have cold edges coming from A1 and A2 and noncold from A3 and A4, +;; due to the pruning we have lost this information and thus end up duplicating +;; both of A's contexts to all of the new nodes (which could result in some +;; unnecessary cloning. + +; DUMP: CCG before cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[D1]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 0, 1, 2 +; DUMP: AllocType 2 StackIds: 0, 1, 3 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 5 7 9 11 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 + +; DUMP: Node [[C]] +; DUMP: Callee: 11485875876353461977 (_Z1Db) Clones: 0 StackIds: 0 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 3 4 5 6 7 8 9 10 11 12 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 +; DUMP: Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[C]] to Caller: [[B1:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 +; DUMP: Edge from Callee [[C]] to Caller: [[B2:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 + +; DUMP: Node [[B1]] +; DUMP: Callee: 15062806102884567440 (_Z1Cb) Clones: 0 StackIds: 1 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 5 7 9 11 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[C]] to Caller: [[B1]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[B1]] to Caller: [[A2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 5 +; DUMP: Edge from Callee [[B1]] to Caller: [[A3:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 7 +; DUMP: Edge from Callee [[B1]] to Caller: [[A1:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 9 +; DUMP: Edge from Callee [[B1]] to Caller: [[A4:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 11 +; DUMP: Edge from Callee [[B1]] to Caller: [[A:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 + +; DUMP: Node [[E]] +; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 2 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[E]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[D2]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 0, 4, 3 +; DUMP: AllocType 2 StackIds: 0, 4, 5 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 3 4 6 8 10 12 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 + +; DUMP: Node [[B2]] +; DUMP: Callee: 15062806102884567440 (_Z1Cb) Clones: 0 StackIds: 4 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 3 4 6 8 10 12 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[C]] to Caller: [[B2]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[B2]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 +; DUMP: Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6 +; DUMP: Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8 +; DUMP: Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10 +; DUMP: Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12 +; DUMP: Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3 + +; DUMP: Node [[F]] +; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 5 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B2]] to Caller: [[F]] AllocTypes: Cold ContextIds: 4 +; DUMP: CallerEdges: + +; DUMP: Node [[A2]] +; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 3, 7 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 5 6 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[A2]] AllocTypes: Cold ContextIds: 5 +; DUMP: Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6 +; DUMP: CallerEdges: + +; DUMP: Node [[A3]] +; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 3, 8 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 7 8 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[A3]] AllocTypes: Cold ContextIds: 7 +; DUMP: Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8 +; DUMP: CallerEdges: + +; DUMP: Node [[A1]] +; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 3 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 9 10 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[A1]] AllocTypes: Cold ContextIds: 9 +; DUMP: Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10 +; DUMP: CallerEdges: + +; DUMP: Node [[A4]] +; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 3, 9 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 11 12 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[A4]] AllocTypes: Cold ContextIds: 11 +; DUMP: Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12 +; DUMP: CallerEdges: + +; DUMP: Node [[A]] +; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 3, 6 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 2 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[A]] AllocTypes: Cold ContextIds: 2 +; DUMP: Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3 +; DUMP: CallerEdges: diff --git a/llvm/test/ThinLTO/X86/memprof-indirectcall.ll b/llvm/test/ThinLTO/X86/memprof-indirectcall.ll new file mode 100644 index 0000000000000..30c8bd27f37b7 --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-indirectcall.ll @@ -0,0 +1,266 @@ +;; Tests callsite context graph generation for call graph containing indirect +;; calls. Currently this should result in conservative behavior, such that the +;; indirect call receives a null call in its graph node, to prevent subsequent +;; cloning. +;; +;; Original code looks like: +;; +;; char *foo() { +;; return new char[10]; +;; } +;; class A { +;; public: +;; virtual char *x() { return foo(); } +;; }; +;; class B : public A { +;; public: +;; char *x() final { return foo(); } +;; }; +;; char *bar(A *a) { +;; return a->x(); +;; } +;; int main(int argc, char **argv) { +;; char *x = foo(); +;; char *y = foo(); +;; B b; +;; char *z = bar(&b); +;; char *w = bar(&b); +;; A a; +;; char *r = bar(&a); +;; char *s = bar(&a); +;; memset(x, 0, 10); +;; memset(y, 0, 10); +;; memset(z, 0, 10); +;; memset(w, 0, 10); +;; memset(r, 0, 10); +;; memset(s, 0, 10); +;; delete[] x; +;; delete[] w; +;; delete[] r; +;; sleep(10); +;; delete[] y; +;; delete[] z; +;; delete[] s; +;; return 0; +;; } +;; +;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; memory freed after sleep(10) results in cold lifetimes. +;; +;; Compiled without optimization to prevent inlining and devirtualization. +;; +;; The IR was then reduced using llvm-reduce with the expected FileCheck input. + +; RUN: opt -thinlto-bc %s >%t.o +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,sleep, \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -r=%t.o,_ZdaPv, \ +; RUN: -r=%t.o,_ZTVN10__cxxabiv120__si_class_type_infoE, \ +; RUN: -r=%t.o,_ZTVN10__cxxabiv117__class_type_infoE, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP + +; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT + + +source_filename = "indirectcall.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@_ZTVN10__cxxabiv120__si_class_type_infoE = external global ptr +@_ZTVN10__cxxabiv117__class_type_infoE = external global ptr + +define internal ptr @_Z3barP1A(ptr %a) { +entry: + ret ptr null +} + +define i32 @main() { +entry: + %call = call ptr @_Z3foov(), !callsite !0 + %call1 = call ptr @_Z3foov(), !callsite !1 + %call2 = call ptr @_Z3barP1A(ptr null), !callsite !2 + %call3 = call ptr @_Z3barP1A(ptr null), !callsite !3 + %call4 = call ptr @_Z3barP1A(ptr null), !callsite !4 + %call5 = call ptr @_Z3barP1A(ptr null), !callsite !5 + ret i32 0 +} + +declare void @_ZdaPv() + +declare i32 @sleep() + +define internal ptr @_ZN1A1xEv() { +entry: + %call = call ptr @_Z3foov(), !callsite !6 + ret ptr null +} + +define internal ptr @_ZN1B1xEv() { +entry: + %call = call ptr @_Z3foov(), !callsite !7 + ret ptr null +} + +define internal ptr @_Z3foov() { +entry: + %call = call ptr @_Znam(i64 0), !memprof !8, !callsite !21 + ret ptr null +} + +declare ptr @_Znam(i64) + +; uselistorder directives +uselistorder ptr @_Z3foov, { 3, 2, 1, 0 } + +!0 = !{i64 8632435727821051414} +!1 = !{i64 -3421689549917153178} +!2 = !{i64 6792096022461663180} +!3 = !{i64 -2709642582978494015} +!4 = !{i64 748269490701775343} +!5 = !{i64 -5747251260480066785} +!6 = !{i64 8256774051149711748} +!7 = !{i64 -4831879094954754638} +!8 = !{!9, !11, !13, !15, !17, !19} +!9 = !{!10, !"notcold"} +!10 = !{i64 2732490490862098848, i64 8256774051149711748, i64 -4820244510750103755, i64 748269490701775343} +!11 = !{!12, !"cold"} +!12 = !{i64 2732490490862098848, i64 8256774051149711748, i64 -4820244510750103755, i64 -5747251260480066785} +!13 = !{!14, !"notcold"} +!14 = !{i64 2732490490862098848, i64 8632435727821051414} +!15 = !{!16, !"cold"} +!16 = !{i64 2732490490862098848, i64 -4831879094954754638, i64 -4820244510750103755, i64 6792096022461663180} +!17 = !{!18, !"notcold"} +!18 = !{i64 2732490490862098848, i64 -4831879094954754638, i64 -4820244510750103755, i64 -2709642582978494015} +!19 = !{!20, !"cold"} +!20 = !{i64 2732490490862098848, i64 -3421689549917153178} +!21 = !{i64 2732490490862098848} + + +; DUMP: CCG before cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[FOO:0x[a-z0-9]+]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 6, 8, 4 +; DUMP: AllocType 2 StackIds: 6, 8, 5 +; DUMP: AllocType 1 StackIds: 0 +; DUMP: AllocType 2 StackIds: 7, 8, 2 +; DUMP: AllocType 1 StackIds: 7, 8, 3 +; DUMP: AllocType 2 StackIds: 1 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 3 4 5 6 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[AX:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3 +; DUMP: Edge from Callee [[FOO]] to Caller: [[BX:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 4 5 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 6 + +; DUMP: Node [[AX]] +; DUMP: Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 6 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[AX]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[AX]] to Caller: [[BAR:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 + +;; Bar contains an indirect call, with multiple targets. It's call should be null. +; DUMP: Node [[BAR]] +; DUMP: null Call +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 4 5 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[AX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN3:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN4:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN5:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN6:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 5 + +; DUMP: Node [[MAIN3]] +; DUMP: Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 4 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN3]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN4]] +; DUMP: Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 5 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN4]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN1]] +; DUMP: Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 0 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3 +; DUMP: CallerEdges: + +; DUMP: Node [[BX]] +; DUMP: Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 7 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 4 5 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[BX]] AllocTypes: NotColdCold ContextIds: 4 5 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5 + +; DUMP: Node [[MAIN5]] +; DUMP: Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 2 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN5]] AllocTypes: Cold ContextIds: 4 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN6]] +; DUMP: Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 3 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 5 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN6]] AllocTypes: NotCold ContextIds: 5 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 1 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 6 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6 +; DUMP: CallerEdges: + + +; DOT: digraph "postbuild" { +; DOT: label="postbuild"; +; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2 3 4 5 6",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3foov -\> alloc}"]; +; DOT: Node[[AX:0x[a-z0-9]+]] [shape=record,tooltip="N[[AX]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 8256774051149711748\n_ZN1A1xEv -\> _Z3foov}"]; +; DOT: Node[[AX]] -> Node[[FOO]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13626499562959447861\nnull call (external)}"]; +; DOT: Node[[BAR]] -> Node[[AX]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOT: Node[[BAR]] -> Node[[BX:0x[a-z0-9]+]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"]; +; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 748269490701775343\nmain -\> _Z3barP1A}"]; +; DOT: Node[[MAIN1]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12699492813229484831\nmain -\> _Z3barP1A}"]; +; DOT: Node[[MAIN2]] -> Node[[BAR]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOT: Node[[MAIN3:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN3]] ContextIds: 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN3]] -> Node[[FOO]][tooltip="ContextIds: 3",fillcolor="brown1"]; +; DOT: Node[[BX]] [shape=record,tooltip="N[[BX]] ContextIds: 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13614864978754796978\n_ZN1B1xEv -\> _Z3foov}"]; +; DOT: Node[[BX]] -> Node[[FOO]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"]; +; DOT: Node[[MAIN4:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN4]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 6792096022461663180\nmain -\> _Z3barP1A}"]; +; DOT: Node[[MAIN4]] -> Node[[BAR]][tooltip="ContextIds: 4",fillcolor="cyan"]; +; DOT: Node[[MAIN5:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN5]] ContextIds: 5",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 15737101490731057601\nmain -\> _Z3barP1A}"]; +; DOT: Node[[MAIN5]] -> Node[[BAR]][tooltip="ContextIds: 5",fillcolor="brown1"]; +; DOT: Node[[MAIN6:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN6]] ContextIds: 6",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN6]] -> Node[[FOO]][tooltip="ContextIds: 6",fillcolor="cyan"]; +; DOT: } diff --git a/llvm/test/ThinLTO/X86/memprof-inlined.ll b/llvm/test/ThinLTO/X86/memprof-inlined.ll new file mode 100644 index 0000000000000..89cd878e99fb4 --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-inlined.ll @@ -0,0 +1,186 @@ +;; Test callsite context graph generation for call graph with two memprof +;; contexts and partial inlining, requiring generation of a new fused node to +;; represent the inlined sequence while matching callsite nodes onto the graph. +;; +;; Original code looks like: +;; +;; char *bar() { +;; return new char[10]; +;; } +;; +;; char *baz() { +;; return bar(); +;; } +;; +;; char *foo() { +;; return baz(); +;; } +;; +;; int main(int argc, char **argv) { +;; char *x = foo(); +;; char *y = foo(); +;; memset(x, 0, 10); +;; memset(y, 0, 10); +;; delete[] x; +;; sleep(10); +;; delete[] y; +;; return 0; +;; } +;; +;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; memory freed after sleep(10) results in cold lifetimes. +;; +;; The code below was created by forcing inlining of baz into foo, and +;; bar into baz. Due to the inlining of bar we will initially have two +;; allocation nodes in the graph. This tests that we correctly match +;; foo (with baz inlined) onto the graph nodes first, and generate a new +;; fused node for it. We should then not match baz (with bar inlined) as that +;; is not reached by the MIB contexts (since all calls from main will look +;; like main -> foo(+baz) -> bar after the inlining reflected in this IR). +;; +;; The IR was then reduced using llvm-reduce with the expected FileCheck input. + +; RUN: opt -thinlto-bc %s >%t.o +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_ZdaPv, \ +; RUN: -r=%t.o,sleep, \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP + +; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT + + +source_filename = "inlined.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define internal ptr @_Z3barv() { +entry: + %call = call ptr @_Znam(i64 0), !memprof !0, !callsite !5 + ret ptr null +} + +declare ptr @_Znam(i64) + +define internal ptr @_Z3bazv() { +entry: + %call.i = call ptr @_Znam(i64 0), !memprof !0, !callsite !6 + ret ptr null +} + +define internal ptr @_Z3foov() { +entry: + %call.i = call ptr @_Z3barv(), !callsite !7 + ret ptr null +} + +define i32 @main() { +entry: + %call = call ptr @_Z3foov(), !callsite !8 + %call1 = call ptr @_Z3foov(), !callsite !9 + ret i32 0 +} + +declare void @_ZdaPv() + +declare i32 @sleep() + +!0 = !{!1, !3} +!1 = !{!2, !"notcold"} +!2 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!3 = !{!4, !"cold"} +!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!5 = !{i64 9086428284934609951} +!6 = !{i64 9086428284934609951, i64 -5964873800580613432} +!7 = !{i64 -5964873800580613432, i64 2732490490862098848} +!8 = !{i64 8632435727821051414} +!9 = !{i64 -3421689549917153178} + + +; DUMP: CCG before cloning: +; DUMP: Callsite Context Graph: + +; DUMP: Node [[BAZ:0x[a-z0-9]+]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 1, 2 +; DUMP: AllocType 2 StackIds: 1, 3 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO2:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 + +;; This is leftover from the MIB on the alloc inlined into baz. It is not +;; matched with any call, since there is no such node in the IR. Due to the +;; null call it will not participate in any context transformations. +; DUMP: Node [[FOO2]] +; DUMP: null Call +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 + +; DUMP: Node [[MAIN1]] +; DUMP: Callee: 2229562716906371625 (_Z3foov) Clones: 0 StackIds: 2 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[FOO:0x[a-z0-9]+]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: Callee: 2229562716906371625 (_Z3foov) Clones: 0 StackIds: 3 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4 +; DUMP: CallerEdges: + +; DUMP: Node [[BAR:0x[a-z0-9]+]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 0, 1, 2 +; DUMP: AllocType 2 StackIds: 0, 1, 3 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 3 4 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 3 4 + +;; This is the node synthesized for the call to bar in foo that was created +;; by inlining baz into foo. +; DUMP: Node [[FOO]] +; DUMP: Callee: 16064618363798697104 (_Z3barv) Clones: 0 StackIds: 0, 1 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 3 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 3 4 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4 + + +; DOT: digraph "postbuild" { +; DOT: label="postbuild"; +; DOT: Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3bazv -\> alloc}"]; +; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\nnull call (external)}"]; +; DOT: Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOT: Node[[MAIN1]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 3",fillcolor="brown1"]; +; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOT: Node[[MAIN2]] -> Node[[FOO2]][tooltip="ContextIds: 4",fillcolor="cyan"]; +; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc2\n_Z3barv -\> alloc}"]; +; DOT: Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"]; +; DOT: Node[[FOO2]] -> Node[[BAR]][tooltip="ContextIds: 3 4",fillcolor="mediumorchid1"]; +; DOT: } diff --git a/llvm/test/ThinLTO/X86/memprof-inlined2.ll b/llvm/test/ThinLTO/X86/memprof-inlined2.ll new file mode 100644 index 0000000000000..1ffae8cd59cef --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-inlined2.ll @@ -0,0 +1,124 @@ +;; Test callsite context graph generation for call graph with two memprof +;; contexts and multiple levels of inlining, requiring generation of new +;; fused nodes to represent the inlined sequence while matching callsite +;; nodes onto the graph. In particular this tests the case where a function +;; has inlined a callee containing an inlined callee. +;; +;; Original code looks like: +;; +;; char *bar() __attribute__((noinline)) { +;; return new char[10]; +;; } +;; +;; char *baz() { +;; return bar(); +;; } +;; +;; char *foo() { +;; return baz(); +;; } +;; +;; int main(int argc, char **argv) { +;; char *x = foo(); +;; char *y = foo(); +;; memset(x, 0, 10); +;; memset(y, 0, 10); +;; delete[] x; +;; sleep(10); +;; delete[] y; +;; return 0; +;; } +;; +;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; memory freed after sleep(10) results in cold lifetimes. +;; +;; Both foo and baz are inlined into main, at both foo callsites. +;; We should update the graph for new fused nodes for both of those inlined +;; callsites to bar. +;; +;; Note that baz and bar are both dead due to the inlining, but have been left +;; in the input IR to ensure that the MIB call chain is matched to the longer +;; inline sequences from main. +;; +;; The IR was then reduced using llvm-reduce with the expected FileCheck input. + +; RUN: opt -thinlto-bc %s >%t.o +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Z3barv,plx \ +; RUN: -r=%t.o,_Z3bazv,plx \ +; RUN: -r=%t.o,_Z3foov,plx \ +; RUN: -r=%t.o,_ZdaPv, \ +; RUN: -r=%t.o,sleep, \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define ptr @_Z3barv() { +entry: + %call = call ptr @_Znam(i64 0), !memprof !0, !callsite !5 + ret ptr null +} + +declare ptr @_Znam(i64) + +declare ptr @_Z3bazv() + +declare ptr @_Z3foov() + +define i32 @main() { +delete.end5: + %call.i.i = call ptr @_Z3barv(), !callsite !6 + %call.i.i8 = call ptr @_Z3barv(), !callsite !7 + ret i32 0 +} + +declare void @_ZdaPv() + +declare i32 @sleep() + +!0 = !{!1, !3} +!1 = !{!2, !"notcold"} +!2 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!3 = !{!4, !"cold"} +!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!5 = !{i64 9086428284934609951} +!6 = !{i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!7 = !{i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} + + +; DUMP: CCG before cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[BAR:0x[a-z0-9]+]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 0, 1, 2 +; DUMP: AllocType 2 StackIds: 0, 1, 3 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 + +;; This is the node synthesized for the first inlined call chain of main->foo->baz +; DUMP: Node [[MAIN1]] +; DUMP: Callee: 17377440600225628772 (_Z3barv) Clones: 0 StackIds: 0, 1, 2 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +;; This is the node synthesized for the second inlined call chain of main->foo->baz +; DUMP: Node [[MAIN2]] +; DUMP: Callee: 17377440600225628772 (_Z3barv) Clones: 0 StackIds: 0, 1, 3 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: diff --git a/llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll b/llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll new file mode 100644 index 0000000000000..7acc6109744ca --- /dev/null +++ b/llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll @@ -0,0 +1,182 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=aggressive-instcombine -S -data-layout="e" | FileCheck %s --check-prefixes=CHECK,LE +; RUN: opt < %s -passes=aggressive-instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=CHECK,BE + + +@constarray1 = internal constant [8 x i8] c"\01\00\01\00\01\00\01\00", align 4 +@constarray2 = internal constant [8 x i8] c"\FF\FF\01\00\01\00\01\00", align 4 + +@g = internal constant i32 42 +@constptrarray = internal constant [4 x ptr] [ptr @g, ptr @g, ptr @g, ptr @g], align 4 + +@constpackedstruct = internal constant <{[8 x i8]}> <{[8 x i8] c"\01\00\01\00\01\00\01\00"}>, align 4 +@conststruct = internal constant {i16, [8 x i8]} {i16 1, [8 x i8] c"\01\00\01\00\01\00\01\00"}, align 4 + +define i8 @inbounds_gep_load_i8_align2(i64 %idx){ +; CHECK-LABEL: @inbounds_gep_load_i8_align2( +; CHECK-NEXT: ret i8 1 +; + %1 = getelementptr inbounds i8, ptr @constarray1, i64 %idx + %2 = load i8, ptr %1, align 2 + ret i8 %2 +} + +; can't be folded because access with i8 strides is not patterned. +define i8 @inbounds_gep_load_i8_align1(i64 %idx){ +; CHECK-LABEL: @inbounds_gep_load_i8_align1( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @constarray1, i64 [[IDX:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +; CHECK-NEXT: ret i8 [[TMP2]] +; + %1 = getelementptr inbounds i8, ptr @constarray1, i64 %idx + %2 = load i8, ptr %1, align 1 + ret i8 %2 +} + +; can't be folded because volatile load cannot assure same results. +define i8 @inbounds_gep_load_i8_align2_volatile(i64 %idx){ +; CHECK-LABEL: @inbounds_gep_load_i8_align2_volatile( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @constarray1, i64 [[IDX:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = load volatile i8, ptr [[TMP1]], align 2 +; CHECK-NEXT: ret i8 [[TMP2]] +; + %1 = getelementptr inbounds i8, ptr @constarray1, i64 %idx + %2 = load volatile i8, ptr %1, align 2 + ret i8 %2 +} + +declare ptr @llvm.ptrmask.p0.i64(ptr , i64) + +; can't be folded because ptrmask can change ptr, while preserving provenance +define i8 @inbounds_gep_load_i8_align2_ptrmasked(i64 %idx, i64 %mask){ +; CHECK-LABEL: @inbounds_gep_load_i8_align2_ptrmasked( +; CHECK-NEXT: ret i8 1 +; + %1 = call ptr @llvm.ptrmask.p0.i64(ptr @constarray1, i64 %mask) + %2 = getelementptr inbounds i8, ptr %1, i64 %idx + %3 = load i8, ptr %2, align 2 + ret i8 %3 +} + +; TODO: this will be ret i32 65537(LE), 16777472(BE) +define i32 @inbounds_gep_i16_load_i32_align1(i64 %idx){ +; CHECK-LABEL: @inbounds_gep_i16_load_i32_align1( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr @constarray1, i64 [[IDX:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 1 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %1 = getelementptr inbounds i16, ptr @constarray1, i64 %idx + %2 = load i32, ptr %1, align 1 + ret i32 %2 +} + +; TODO: this will be ret i32 65537(LE), 16777472(BE) +define i32 @inbounds_gep_i32_load_i32_align8(i64 %idx){ +; CHECK-LABEL: @inbounds_gep_i32_load_i32_align8( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr @constarray1, i64 [[IDX:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %1 = getelementptr inbounds i32, ptr @constarray1, i64 %idx + %2 = load i32, ptr %1, align 8 + ret i32 %2 +} + +; TODO: this will be ret i32 65547(LE), 16777472(BE) +define i32 @inbounds_gep_i32_load_i32_const_offset(i64 %idx){ +; CHECK-LABEL: @inbounds_gep_i32_load_i32_const_offset( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr @constarray2, i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDX:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = getelementptr inbounds i16, ptr @constarray2, i64 1 + %2 = getelementptr inbounds i32, ptr %1, i64 %idx + %3 = load i32, ptr %2, align 4 + ret i32 %3 +} + +define i32 @gep_load_i32_align2_const_offset(i64 %idx){ +; LE-LABEL: @gep_load_i32_align2_const_offset( +; LE-NEXT: ret i32 65537 +; +; BE-LABEL: @gep_load_i32_align2_const_offset( +; BE-NEXT: ret i32 16777472 +; + %1 = getelementptr i16, ptr @constarray1, i64 -2 + %2 = getelementptr [3 x i16], ptr %1, i64 %idx + %3 = load i32, ptr %2, align 2 + ret i32 %3 +} + +; can't be folded because if gep is non-inbounds, +; the offsets are silently-wrapped with two’s complement arithmetic(mod 2**64). +; So the load operand can be a base pointer of constarray2. +define i32 @gep_load_i32_align2_const_offset_wrap(i64 %idx){ +; CHECK-LABEL: @gep_load_i32_align2_const_offset_wrap( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr @constarray2, i64 -2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [3 x i16], ptr [[TMP1]], i64 [[IDX:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 2 +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = getelementptr i16, ptr @constarray2, i64 -2 + %2 = getelementptr [3 x i16], ptr %1, i64 %idx + %3 = load i32, ptr %2, align 2 + ret i32 %3 +} + +; TODO: this will be ret i32 42 +define i32 @inbounds_gep_i32_load_i32_const_ptr_array(i64 %idx){ +; CHECK-LABEL: @inbounds_gep_i32_load_i32_const_ptr_array( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr @constptrarray, i64 [[IDX:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = getelementptr inbounds ptr, ptr @constptrarray, i64 %idx + %2 = load ptr, ptr %1, align 4 + %3 = load i32, ptr %2, align 4 + ret i32 %3 +} + +define i32 @inbounds_gep_i32_load_i32_align4_packedstruct(i64 %idx){ +; LE-LABEL: @inbounds_gep_i32_load_i32_align4_packedstruct( +; LE-NEXT: ret i32 65537 +; +; BE-LABEL: @inbounds_gep_i32_load_i32_align4_packedstruct( +; BE-NEXT: ret i32 16777472 +; + %1 = getelementptr inbounds i32, ptr @constpackedstruct, i64 %idx + %2 = load i32, ptr %1, align 4 + ret i32 %2 +} + +; can't be folded because results are not equal +define i32 @inbounds_gep_i8_load_i32_align1_packedstruct(i64 %idx){ +; CHECK-LABEL: @inbounds_gep_i8_load_i32_align1_packedstruct( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @constpackedstruct, i64 [[IDX:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 1 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %1 = getelementptr inbounds i8, ptr @constpackedstruct, i64 %idx + %2 = load i32, ptr %1, align 1 + ret i32 %2 +} + +; TODO: this coould be folded into 65537(LE), 16777472(BE) +define i32 @inbounds_gep_i32_load_i32_align4_struct_with_const_offset(i64 %idx){ +; LE-LABEL: @inbounds_gep_i32_load_i32_align4_struct_with_const_offset( +; LE-NEXT: ret i32 65537 +; +; BE-LABEL: @inbounds_gep_i32_load_i32_align4_struct_with_const_offset( +; BE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr @conststruct, i64 1 +; BE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDX:%.*]] +; BE-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +; BE-NEXT: ret i32 [[TMP3]] +; + %1 = getelementptr inbounds i16, ptr @conststruct, i64 1 + %2 = getelementptr inbounds i32, ptr %1, i64 %idx + %3 = load i32, ptr %2, align 4 + ret i32 %3 +} + diff --git a/llvm/test/Transforms/ArgumentPromotion/propagate-remove-dead-args.ll b/llvm/test/Transforms/ArgumentPromotion/propagate-remove-dead-args.ll new file mode 100644 index 0000000000000..4176a8a7bc5c8 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/propagate-remove-dead-args.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s + +%ptr.struct = type { ptr, ptr, ptr } + +define internal void @child(ptr %this, ptr %y, ptr %x) { +; CHECK-LABEL: define internal void @child +; CHECK-SAME: (ptr [[Y:%.*]], half [[X_0_VAL:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: store half [[X_0_VAL]], ptr [[Y]], align 2 +; CHECK-NEXT: ret void +; +entry: + %0 = load half, ptr %x + store half %0, ptr %y + ret void +} + +define internal void @parent(ptr %this, ptr %p1, ptr %p2) { +; CHECK-LABEL: define internal void @parent +; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P2_VAL2:%.*]] = load half, ptr [[P2]], align 2 +; CHECK-NEXT: call void @child(ptr [[P1]], half [[P2_VAL2]]) +; CHECK-NEXT: [[P2_VAL1:%.*]] = load half, ptr [[P2]], align 2 +; CHECK-NEXT: call void @child(ptr [[P1]], half [[P2_VAL1]]) +; CHECK-NEXT: [[P2_VAL:%.*]] = load half, ptr [[P2]], align 2 +; CHECK-NEXT: call void @child(ptr [[P1]], half [[P2_VAL]]) +; CHECK-NEXT: ret void +; +entry: + %src_element_op_0 = getelementptr ptr, ptr %this, i64 0 + %load0 = load ptr, ptr %src_element_op_0 + call void @child(ptr %load0, ptr %p1, ptr %p2) + %src_element_op_1 = getelementptr ptr, ptr %this, i64 1 + %load1 = load ptr, ptr %src_element_op_1 + call void @child(ptr %load1, ptr %p1, ptr %p2) + %src_element_op_2 = getelementptr ptr, ptr %this, i64 2 + %load2 = load ptr, ptr %src_element_op_2 + call void @child(ptr %load2, ptr %p1, ptr %p2) + ret void +} + +define void @grandparent() { +; CHECK-LABEL: define void @grandparent() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[XPTR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[YPTR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @parent(ptr [[XPTR]], ptr [[YPTR]]) +; CHECK-NEXT: ret void +; +entry: + %f = alloca %ptr.struct + %xptr = alloca i32 + %yptr = alloca i32 + call void @parent(ptr %f, ptr %xptr, ptr %yptr) + ret void +} + diff --git a/llvm/test/Transforms/ArgumentPromotion/store-into-inself.ll b/llvm/test/Transforms/ArgumentPromotion/store-into-inself.ll index 7d7099003dc77..be94af6a0bd03 100644 --- a/llvm/test/Transforms/ArgumentPromotion/store-into-inself.ll +++ b/llvm/test/Transforms/ArgumentPromotion/store-into-inself.ll @@ -83,7 +83,6 @@ define i32 @main() nounwind { ; CHECK-NEXT: call void @g(ptr byval(ptr) align 4 [[S]]) #[[ATTR0]] ; CHECK-NEXT: call void @h(ptr byval(ptr) align 4 [[S]]) #[[ATTR0]] ; CHECK-NEXT: call void @k(ptr byval(ptr) align 4 [[S]]) #[[ATTR0]] -; CHECK-NEXT: [[S_VAL:%.*]] = load ptr, ptr [[S]], align 8 ; CHECK-NEXT: call void @l() #[[ATTR0]] ; CHECK-NEXT: ret i32 0 ; diff --git a/llvm/test/Transforms/GVN/range.ll b/llvm/test/Transforms/GVN/range.ll index ae0801ee59da1..48605aef0fe7d 100644 --- a/llvm/test/Transforms/GVN/range.ll +++ b/llvm/test/Transforms/GVN/range.ll @@ -17,7 +17,7 @@ define i32 @test1(ptr %p) { define i32 @test2(ptr %p) { ; CHECK-LABEL: define i32 @test2 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4 ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -30,7 +30,7 @@ define i32 @test2(ptr %p) { define i32 @test3(ptr %p) { ; CHECK-LABEL: define i32 @test3 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG1:![0-9]+]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -43,7 +43,7 @@ define i32 @test3(ptr %p) { define i32 @test4(ptr %p) { ; CHECK-LABEL: define i32 @test4 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG2:![0-9]+]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -56,7 +56,7 @@ define i32 @test4(ptr %p) { define i32 @test5(ptr %p) { ; CHECK-LABEL: define i32 @test5 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG1:![0-9]+]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG3:![0-9]+]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -69,7 +69,7 @@ define i32 @test5(ptr %p) { define i32 @test6(ptr %p) { ; CHECK-LABEL: define i32 @test6 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG2:![0-9]+]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG4:![0-9]+]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -82,7 +82,7 @@ define i32 @test6(ptr %p) { define i32 @test7(ptr %p) { ; CHECK-LABEL: define i32 @test7 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG3:![0-9]+]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG5:![0-9]+]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -95,7 +95,7 @@ define i32 @test7(ptr %p) { define i32 @test8(ptr %p) { ; CHECK-LABEL: define i32 @test8 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG4:![0-9]+]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4 ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -105,6 +105,31 @@ define i32 @test8(ptr %p) { ret i32 %c } +define i32 @load_noundef_load(ptr %p) { +; CHECK-LABEL: define i32 @load_noundef_load +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]], !noundef !6 +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] +; + %a = load i32, ptr %p, !range !0, !noundef !11 + %b = load i32, ptr %p, !range !1 + %c = add i32 %a, %b + ret i32 %c +} + +define i32 @load_load_noundef(ptr %p) { +; CHECK-LABEL: define i32 @load_load_noundef +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG1]] +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] +; + %a = load i32, ptr %p, !range !0 + %b = load i32, ptr %p, !range !1, !noundef !11 + %c = add i32 %a, %b + ret i32 %c +} !0 = !{i32 0, i32 2} !1 = !{i32 3, i32 5} @@ -117,10 +142,13 @@ define i32 @test8(ptr %p) { !8 = !{i32 5, i32 1} !9 = !{i32 1, i32 5} !10 = !{i32 5, i32 1} +!11 = !{} ;. ; CHECK: [[RNG0]] = !{i32 0, i32 2} -; CHECK: [[RNG1]] = !{i32 -5, i32 -2} -; CHECK: [[RNG2]] = !{i32 10, i32 1} -; CHECK: [[RNG3]] = !{i32 1, i32 2, i32 3, i32 4} -; CHECK: [[RNG4]] = !{i32 1, i32 5} +; CHECK: [[RNG1]] = !{i32 0, i32 2, i32 3, i32 5} +; CHECK: [[RNG2]] = !{i32 0, i32 5} +; CHECK: [[RNG3]] = !{i32 -5, i32 -2, i32 1, i32 5} +; CHECK: [[RNG4]] = !{i32 10, i32 1} +; CHECK: [[RNG5]] = !{i32 3, i32 4, i32 5, i32 2} +; CHECK: [[META6:![0-9]+]] = !{} ;. diff --git a/llvm/test/Transforms/InstCombine/bit_ceil.ll b/llvm/test/Transforms/InstCombine/bit_ceil.ll index aa98896aac549..6f714153a598a 100644 --- a/llvm/test/Transforms/InstCombine/bit_ceil.ll +++ b/llvm/test/Transforms/InstCombine/bit_ceil.ll @@ -6,10 +6,9 @@ define i32 @bit_ceil_32(i32 %x) { ; CHECK-LABEL: @bit_ceil_32( ; CHECK-NEXT: [[DEC:%.*]] = add i32 [[X:%.*]], -1 ; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[DEC]], i1 false), !range [[RNG0:![0-9]+]] -; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]] -; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]] -; CHECK-NEXT: [[UGT:%.*]] = icmp ugt i32 [[X]], 1 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[UGT]], i32 [[SHL]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = sub nsw i32 0, [[CTLZ]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 31 +; CHECK-NEXT: [[SEL:%.*]] = shl nuw i32 1, [[TMP2]] ; CHECK-NEXT: ret i32 [[SEL]] ; %dec = add i32 %x, -1 @@ -26,10 +25,9 @@ define i64 @bit_ceil_64(i64 %x) { ; CHECK-LABEL: @bit_ceil_64( ; CHECK-NEXT: [[DEC:%.*]] = add i64 [[X:%.*]], -1 ; CHECK-NEXT: [[CTLZ:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[DEC]], i1 false), !range [[RNG1:![0-9]+]] -; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i64 64, [[CTLZ]] -; CHECK-NEXT: [[SHL:%.*]] = shl nuw i64 1, [[SUB]] -; CHECK-NEXT: [[UGT:%.*]] = icmp ugt i64 [[X]], 1 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[UGT]], i64 [[SHL]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = sub nsw i64 0, [[CTLZ]] +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], 63 +; CHECK-NEXT: [[SEL:%.*]] = shl nuw i64 1, [[TMP2]] ; CHECK-NEXT: ret i64 [[SEL]] ; %dec = add i64 %x, -1 @@ -47,11 +45,9 @@ define i32 @bit_ceil_32_minus_1(i32 %x) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SUB:%.*]] = add i32 [[X:%.*]], -2 ; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[SUB]], i1 false), !range [[RNG0]] -; CHECK-NEXT: [[SUB2:%.*]] = sub nuw nsw i32 32, [[CTLZ]] -; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB2]] -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X]], -3 -; CHECK-NEXT: [[ULT:%.*]] = icmp ult i32 [[ADD]], -2 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[ULT]], i32 [[SHL]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = sub nsw i32 0, [[CTLZ]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 31 +; CHECK-NEXT: [[SEL:%.*]] = shl nuw i32 1, [[TMP1]] ; CHECK-NEXT: ret i32 [[SEL]] ; entry: @@ -69,11 +65,9 @@ entry: define i32 @bit_ceil_32_plus_1(i32 %x) { ; CHECK-LABEL: @bit_ceil_32_plus_1( ; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range [[RNG0]] -; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]] -; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]] -; CHECK-NEXT: [[DEC:%.*]] = add i32 [[X]], -1 -; CHECK-NEXT: [[ULT:%.*]] = icmp ult i32 [[DEC]], -2 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[ULT]], i32 [[SHL]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = sub nsw i32 0, [[CTLZ]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 31 +; CHECK-NEXT: [[SEL:%.*]] = shl nuw i32 1, [[TMP2]] ; CHECK-NEXT: ret i32 [[SEL]] ; %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) @@ -85,15 +79,15 @@ define i32 @bit_ceil_32_plus_1(i32 %x) { ret i32 %sel } +; std::bit_ceil(x + 2) define i32 @bit_ceil_plus_2(i32 %x) { ; CHECK-LABEL: @bit_ceil_plus_2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SUB:%.*]] = add i32 [[X:%.*]], 1 ; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[SUB]], i1 false), !range [[RNG0]] -; CHECK-NEXT: [[SUB2:%.*]] = sub nuw nsw i32 32, [[CTLZ]] -; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB2]] -; CHECK-NEXT: [[ULT:%.*]] = icmp ult i32 [[X]], -2 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[ULT]], i32 [[SHL]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = sub nsw i32 0, [[CTLZ]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 31 +; CHECK-NEXT: [[SEL:%.*]] = shl nuw i32 1, [[TMP1]] ; CHECK-NEXT: ret i32 [[SEL]] ; entry: @@ -112,11 +106,9 @@ define i32 @bit_ceil_32_neg(i32 %x) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SUB:%.*]] = xor i32 [[X:%.*]], -1 ; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[SUB]], i1 false), !range [[RNG0]] -; CHECK-NEXT: [[SUB2:%.*]] = sub nuw nsw i32 32, [[CTLZ]] -; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB2]] -; CHECK-NEXT: [[NOTSUB:%.*]] = add i32 [[X]], -1 -; CHECK-NEXT: [[ULT:%.*]] = icmp ult i32 [[NOTSUB]], -2 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[ULT]], i32 [[SHL]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = sub nsw i32 0, [[CTLZ]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 31 +; CHECK-NEXT: [[SEL:%.*]] = shl nuw i32 1, [[TMP1]] ; CHECK-NEXT: ret i32 [[SEL]] ; entry: @@ -136,10 +128,9 @@ define i32 @bit_ceil_not(i32 %x) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SUB:%.*]] = sub i32 -2, [[X:%.*]] ; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[SUB]], i1 false), !range [[RNG0]] -; CHECK-NEXT: [[SUB2:%.*]] = sub nuw nsw i32 32, [[CTLZ]] -; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB2]] -; CHECK-NEXT: [[ULT:%.*]] = icmp ult i32 [[X]], -2 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[ULT]], i32 [[SHL]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = sub nsw i32 0, [[CTLZ]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 31 +; CHECK-NEXT: [[SEL:%.*]] = shl nuw i32 1, [[TMP1]] ; CHECK-NEXT: ret i32 [[SEL]] ; entry: @@ -152,5 +143,148 @@ entry: ret i32 %sel } +; Commuted select operands should still be recognized. +define i32 @bit_ceil_commuted_operands(i32 %x) { +; CHECK-LABEL: @bit_ceil_commuted_operands( +; CHECK-NEXT: [[DEC:%.*]] = add i32 [[X:%.*]], -1 +; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[DEC]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[TMP1:%.*]] = sub nsw i32 0, [[CTLZ]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 31 +; CHECK-NEXT: [[SEL:%.*]] = shl nuw i32 1, [[TMP2]] +; CHECK-NEXT: ret i32 [[SEL]] +; + %dec = add i32 %x, -1 + %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false) + %sub = sub i32 32, %ctlz + %shl = shl i32 1, %sub + %eq = icmp eq i32 %dec, 0 + %sel = select i1 %eq, i32 1, i32 %shl + ret i32 %sel +} + +; Negative test: wrong select constant +define i32 @bit_ceil_wrong_select_constant(i32 %x) { +; CHECK-LABEL: @bit_ceil_wrong_select_constant( +; CHECK-NEXT: [[DEC:%.*]] = add i32 [[X:%.*]], -1 +; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[DEC]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]] +; CHECK-NEXT: [[UGT_INV:%.*]] = icmp ult i32 [[X]], 2 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[UGT_INV]], i32 2, i32 [[SHL]] +; CHECK-NEXT: ret i32 [[SEL]] +; + %dec = add i32 %x, -1 + %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false) + %sub = sub i32 32, %ctlz + %shl = shl i32 1, %sub + %ugt = icmp ugt i32 %x, 1 + %sel = select i1 %ugt, i32 %shl, i32 2 + ret i32 %sel +} + +; Negative test: select condition != false does not guarantee ctlz being either 0 or 32 +define i32 @bit_ceil_32_wrong_cond(i32 %x) { +; CHECK-LABEL: @bit_ceil_32_wrong_cond( +; CHECK-NEXT: [[DEC:%.*]] = add i32 [[X:%.*]], -1 +; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[DEC]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]] +; CHECK-NEXT: [[UGT:%.*]] = icmp ugt i32 [[X]], 2 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[UGT]], i32 [[SHL]], i32 1 +; CHECK-NEXT: ret i32 [[SEL]] +; + %dec = add i32 %x, -1 + %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false) + %sub = sub i32 32, %ctlz + %shl = shl i32 1, %sub + %ugt = icmp ugt i32 %x, 2 + %sel = select i1 %ugt, i32 %shl, i32 1 + ret i32 %sel +} + +; Negative test: wrong sub constant +define i32 @bit_ceil_wrong_sub_constant(i32 %x) { +; CHECK-LABEL: @bit_ceil_wrong_sub_constant( +; CHECK-NEXT: [[DEC:%.*]] = add i32 [[X:%.*]], -1 +; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[DEC]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 33, [[CTLZ]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]] +; CHECK-NEXT: [[UGT:%.*]] = icmp ugt i32 [[X]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[UGT]], i32 [[SHL]], i32 1 +; CHECK-NEXT: ret i32 [[SEL]] +; + %dec = add i32 %x, -1 + %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false) + %sub = sub i32 33, %ctlz + %shl = shl i32 1, %sub + %ugt = icmp ugt i32 %x, 1 + %sel = select i1 %ugt, i32 %shl, i32 1 + ret i32 %sel +} + +; Negative test: the shl result used twice +define i32 @bit_ceil_32_shl_used_twice(i32 %x, ptr %p) { +; CHECK-LABEL: @bit_ceil_32_shl_used_twice( +; CHECK-NEXT: [[DEC:%.*]] = add i32 [[X:%.*]], -1 +; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[DEC]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]] +; CHECK-NEXT: [[UGT:%.*]] = icmp ugt i32 [[X]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[UGT]], i32 [[SHL]], i32 1 +; CHECK-NEXT: store i32 [[SHL]], ptr [[P:%.*]], align 4 +; CHECK-NEXT: ret i32 [[SEL]] +; + %dec = add i32 %x, -1 + %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false) + %sub = sub i32 32, %ctlz + %shl = shl i32 1, %sub + %ugt = icmp ugt i32 %x, 1 + %sel = select i1 %ugt, i32 %shl, i32 1 + store i32 %shl, ptr %p, align 4 + ret i32 %sel +} + +; Negative test: the sub result used twice +define i32 @bit_ceil_32_sub_used_twice(i32 %x, ptr %p) { +; CHECK-LABEL: @bit_ceil_32_sub_used_twice( +; CHECK-NEXT: [[DEC:%.*]] = add i32 [[X:%.*]], -1 +; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[DEC]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]] +; CHECK-NEXT: [[UGT:%.*]] = icmp ugt i32 [[X]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[UGT]], i32 [[SHL]], i32 1 +; CHECK-NEXT: store i32 [[SUB]], ptr [[P:%.*]], align 4 +; CHECK-NEXT: ret i32 [[SEL]] +; + %dec = add i32 %x, -1 + %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false) + %sub = sub i32 32, %ctlz + %shl = shl i32 1, %sub + %ugt = icmp ugt i32 %x, 1 + %sel = select i1 %ugt, i32 %shl, i32 1 + store i32 %sub, ptr %p, align 4 + ret i32 %sel +} + +; a vector version of @bit_ceil_32 above +define <4 x i32> @bit_ceil_v4i32(<4 x i32> %x) { +; CHECK-LABEL: @bit_ceil_v4i32( +; CHECK-NEXT: [[DEC:%.*]] = add <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[CTLZ:%.*]] = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[DEC]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <4 x i32> zeroinitializer, [[CTLZ]] +; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[TMP1]], +; CHECK-NEXT: [[SEL:%.*]] = shl nuw <4 x i32> , [[TMP2]] +; CHECK-NEXT: ret <4 x i32> [[SEL]] +; + %dec = add <4 x i32> %x, + %ctlz = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %dec, i1 false) + %sub = sub <4 x i32> , %ctlz + %shl = shl <4 x i32> , %sub + %ugt = icmp ugt <4 x i32> %x, + %sel = select <4 x i1> %ugt, <4 x i32> %shl, <4 x i32> + ret <4 x i32> %sel +} + declare i32 @llvm.ctlz.i32(i32, i1 immarg) declare i64 @llvm.ctlz.i64(i64, i1 immarg) +declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) diff --git a/llvm/test/Transforms/InstCombine/bit_floor.ll b/llvm/test/Transforms/InstCombine/bit_floor.ll index 0ef7fe3d22e0f..9daa8eee8969c 100644 --- a/llvm/test/Transforms/InstCombine/bit_floor.ll +++ b/llvm/test/Transforms/InstCombine/bit_floor.ll @@ -39,5 +39,134 @@ define i64 @bit_floor_64(i64 %x) { ret i64 %sel } +; Commutted select operands should still be recognized. +define i32 @bit_floor_commuted_operands(i32 %x) { +; CHECK-LABEL: @bit_floor_commuted_operands( +; CHECK-NEXT: [[NE0_NOT:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[X]], 1 +; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[LSHR]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[NE0_NOT]], i32 0, i32 [[SHL]] +; CHECK-NEXT: ret i32 [[SEL]] +; + %ne0 = icmp ne i32 %x, 0 + %lshr = lshr i32 %x, 1 + %ctlz = tail call i32 @llvm.ctlz.i32(i32 %lshr, i1 false) + %sub = sub i32 32, %ctlz + %shl = shl i32 1, %sub + %sel = select i1 %ne0, i32 %shl, i32 0 + ret i32 %sel +} + +; Negative test: lshr used twice +define i32 @bit_floor_lshr_used_twice(i32 %x, ptr %p) { +; CHECK-LABEL: @bit_floor_lshr_used_twice( +; CHECK-NEXT: [[EQ0:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[X]], 1 +; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[LSHR]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[EQ0]], i32 0, i32 [[SHL]] +; CHECK-NEXT: store i32 [[LSHR]], ptr [[P:%.*]], align 4 +; CHECK-NEXT: ret i32 [[SEL]] +; + %eq0 = icmp eq i32 %x, 0 + %lshr = lshr i32 %x, 1 + %ctlz = tail call i32 @llvm.ctlz.i32(i32 %lshr, i1 false) + %sub = sub i32 32, %ctlz + %shl = shl i32 1, %sub + %sel = select i1 %eq0, i32 0, i32 %shl + store i32 %lshr, ptr %p, align 4 + ret i32 %sel +} + +; Negative test: ctlz used twice +define i32 @bit_floor_ctlz_used_twice(i32 %x, ptr %p) { +; CHECK-LABEL: @bit_floor_ctlz_used_twice( +; CHECK-NEXT: [[EQ0:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[X]], 1 +; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[LSHR]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[EQ0]], i32 0, i32 [[SHL]] +; CHECK-NEXT: store i32 [[CTLZ]], ptr [[P:%.*]], align 4 +; CHECK-NEXT: ret i32 [[SEL]] +; + %eq0 = icmp eq i32 %x, 0 + %lshr = lshr i32 %x, 1 + %ctlz = tail call i32 @llvm.ctlz.i32(i32 %lshr, i1 false) + %sub = sub i32 32, %ctlz + %shl = shl i32 1, %sub + %sel = select i1 %eq0, i32 0, i32 %shl + store i32 %ctlz, ptr %p, align 4 + ret i32 %sel +} + +; Negative test: sub used twice +define i32 @bit_floor_sub_used_twice(i32 %x, ptr %p) { +; CHECK-LABEL: @bit_floor_sub_used_twice( +; CHECK-NEXT: [[EQ0:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[X]], 1 +; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[LSHR]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[EQ0]], i32 0, i32 [[SHL]] +; CHECK-NEXT: store i32 [[SUB]], ptr [[P:%.*]], align 4 +; CHECK-NEXT: ret i32 [[SEL]] +; + %eq0 = icmp eq i32 %x, 0 + %lshr = lshr i32 %x, 1 + %ctlz = tail call i32 @llvm.ctlz.i32(i32 %lshr, i1 false) + %sub = sub i32 32, %ctlz + %shl = shl i32 1, %sub + %sel = select i1 %eq0, i32 0, i32 %shl + store i32 %sub, ptr %p, align 4 + ret i32 %sel +} + +; Negative test: shl used twice +define i32 @bit_floor_shl_used_twice(i32 %x, ptr %p) { +; CHECK-LABEL: @bit_floor_shl_used_twice( +; CHECK-NEXT: [[EQ0:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[X]], 1 +; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[LSHR]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[EQ0]], i32 0, i32 [[SHL]] +; CHECK-NEXT: store i32 [[SHL]], ptr [[P:%.*]], align 4 +; CHECK-NEXT: ret i32 [[SEL]] +; + %eq0 = icmp eq i32 %x, 0 + %lshr = lshr i32 %x, 1 + %ctlz = tail call i32 @llvm.ctlz.i32(i32 %lshr, i1 false) + %sub = sub i32 32, %ctlz + %shl = shl i32 1, %sub + %sel = select i1 %eq0, i32 0, i32 %shl + store i32 %shl, ptr %p, align 4 + ret i32 %sel +} + +; a vector version of @bit_floor_32 above +define <4 x i32> @bit_floor_v4i32(<4 x i32> %x) { +; CHECK-LABEL: @bit_floor_v4i32( +; CHECK-NEXT: [[EQ0:%.*]] = icmp eq <4 x i32> [[X:%.*]], zeroinitializer +; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X]], +; CHECK-NEXT: [[CTLZ:%.*]] = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[LSHR]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw <4 x i32> , [[CTLZ]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw <4 x i32> , [[SUB]] +; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[EQ0]], <4 x i32> zeroinitializer, <4 x i32> [[SHL]] +; CHECK-NEXT: ret <4 x i32> [[SEL]] +; + %eq0 = icmp eq <4 x i32> %x, + %lshr = lshr <4 x i32> %x, + %ctlz = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %lshr, i1 false) + %sub = sub <4 x i32> , %ctlz + %shl = shl <4 x i32> , %sub + %sel = select <4 x i1> %eq0, <4 x i32> , <4 x i32> %shl + ret <4 x i32> %sel +} + declare i32 @llvm.ctlz.i32(i32, i1 immarg) declare i64 @llvm.ctlz.i64(i64, i1 immarg) +declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll index 09dbff00f0055..bb70b4e0c1be2 100644 --- a/llvm/test/Transforms/InstCombine/bswap.ll +++ b/llvm/test/Transforms/InstCombine/bswap.ll @@ -541,8 +541,8 @@ define i8 @PR39793_bswap_u32_as_u16_trunc(i32 %0) { define i32 @partial_bswap(i32 %x) { ; CHECK-LABEL: @partial_bswap( -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.bswap.i32(i32 [[X:%.*]]) -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.bswap.i32(i32 [[X:%.*]]) +; CHECK-NEXT: ret i32 [[R]] ; %x3 = shl i32 %x, 24 %a2 = shl i32 %x, 8 @@ -557,8 +557,8 @@ declare i32 @llvm.bswap.i32(i32) define <2 x i32> @partial_bswap_vector(<2 x i32> %x) { ; CHECK-LABEL: @partial_bswap_vector( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[X:%.*]]) -; CHECK-NEXT: ret <2 x i32> [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[X:%.*]]) +; CHECK-NEXT: ret <2 x i32> [[R]] ; %x3 = shl <2 x i32> %x, %a2 = shl <2 x i32> %x, @@ -929,3 +929,43 @@ define i32 @PR50910(i64 %t0) { %t6 = trunc i64 %t5 to i32 ret i32 %t6 } + +define i64 @PR60690_call_fshl(i64 %result) { +; CHECK-LABEL: @PR60690_call_fshl( +; CHECK-NEXT: [[OR_I12:%.*]] = call i64 @llvm.bswap.i64(i64 [[RESULT:%.*]]) +; CHECK-NEXT: ret i64 [[OR_I12]] +; + %and.i = lshr i64 %result, 8 + %shr.i = and i64 %and.i, 71777214294589695 + %and1.i = shl i64 %result, 8 + %shl.i = and i64 %and1.i, -71777214294589696 + %or.i = or i64 %shr.i, %shl.i + %and.i7 = shl i64 %or.i, 16 + %shl.i8 = and i64 %and.i7, -281470681808896 + %and1.i9 = lshr i64 %or.i, 16 + %shr.i10 = and i64 %and1.i9, 281470681808895 + %or.i11 = or i64 %shl.i8, %shr.i10 + %or.i12 = tail call i64 @llvm.fshl.i64(i64 %or.i11, i64 %or.i11, i64 32) + ret i64 %or.i12 +} +declare i64 @llvm.fshl.i64(i64, i64, i64) + +define i64 @PR60690_call_fshr(i64 %result) { +; CHECK-LABEL: @PR60690_call_fshr( +; CHECK-NEXT: [[OR_I12:%.*]] = call i64 @llvm.bswap.i64(i64 [[RESULT:%.*]]) +; CHECK-NEXT: ret i64 [[OR_I12]] +; + %and.i = lshr i64 %result, 8 + %shr.i = and i64 %and.i, 71777214294589695 + %and1.i = shl i64 %result, 8 + %shl.i = and i64 %and1.i, -71777214294589696 + %or.i = or i64 %shr.i, %shl.i + %and.i7 = shl i64 %or.i, 16 + %shl.i8 = and i64 %and.i7, -281470681808896 + %and1.i9 = lshr i64 %or.i, 16 + %shr.i10 = and i64 %and1.i9, 281470681808895 + %or.i11 = or i64 %shl.i8, %shr.i10 + %or.i12 = tail call i64 @llvm.fshr.i64(i64 %or.i11, i64 %or.i11, i64 32) + ret i64 %or.i12 +} +declare i64 @llvm.fshr.i64(i64, i64, i64) diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll index 7d3f3948511d1..489f6e686680b 100644 --- a/llvm/test/Transforms/InstCombine/fsh.ll +++ b/llvm/test/Transforms/InstCombine/fsh.ll @@ -672,8 +672,9 @@ define i32 @fshl_mask_args_same1(i32 %a) { define i32 @fshl_mask_args_same2(i32 %a) { ; CHECK-LABEL: @fshl_mask_args_same2( -; CHECK-NEXT: [[T1:%.*]] = shl i32 [[A:%.*]], 8 -; CHECK-NEXT: [[T2:%.*]] = and i32 [[T1]], 65280 +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[A:%.*]] to i16 +; CHECK-NEXT: [[REV:%.*]] = shl i16 [[TRUNC]], 8 +; CHECK-NEXT: [[T2:%.*]] = zext i16 [[REV]] to i32 ; CHECK-NEXT: ret i32 [[T2]] ; %t1 = and i32 %a, 255 @@ -683,8 +684,8 @@ define i32 @fshl_mask_args_same2(i32 %a) { define i32 @fshl_mask_args_same3(i32 %a) { ; CHECK-LABEL: @fshl_mask_args_same3( -; CHECK-NEXT: [[T2:%.*]] = shl i32 [[A:%.*]], 24 -; CHECK-NEXT: ret i32 [[T2]] +; CHECK-NEXT: [[REV:%.*]] = shl i32 [[A:%.*]], 24 +; CHECK-NEXT: ret i32 [[REV]] ; %t1 = and i32 %a, 255 %t2 = call i32 @llvm.fshl.i32(i32 %t1, i32 %t1, i32 24) diff --git a/llvm/test/Transforms/InstSimplify/load-patterned-aggregates.ll b/llvm/test/Transforms/InstSimplify/load-patterned-aggregates.ll deleted file mode 100644 index 82283648936cf..0000000000000 --- a/llvm/test/Transforms/InstSimplify/load-patterned-aggregates.ll +++ /dev/null @@ -1,134 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=instsimplify -S | FileCheck %s -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" -@constzeroarray = internal constant [4 x i32] zeroinitializer - -@constarray = internal constant [8 x i8] c"\01\00\01\00\01\00\01\00", align 4 -@conststruct = internal constant <{[8 x i8]}> <{[8 x i8] c"\01\00\01\00\01\00\01\00"}>, align 4 - -define i32 @load_gep_const_zero_array(i64 %idx) { -; CHECK-LABEL: @load_gep_const_zero_array( -; CHECK-NEXT: ret i32 0 -; - %gep = getelementptr inbounds [4 x i32], ptr @constzeroarray, i64 0, i64 %idx - %load = load i32, ptr %gep - ret i32 %load -} - -define i8 @load_i8_multi_gep_const_zero_array(i64 %idx1, i64 %idx2) { -; CHECK-LABEL: @load_i8_multi_gep_const_zero_array( -; CHECK-NEXT: ret i8 0 -; - %gep1 = getelementptr inbounds i8, ptr @constzeroarray, i64 %idx1 - %gep = getelementptr inbounds i8, ptr %gep1, i64 %idx2 - %load = load i8, ptr %gep - ret i8 %load -} - - -define i32 @load_gep_const_patterned_array(i64 %idx) { -; CHECK-LABEL: @load_gep_const_patterned_array( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr @constarray, i64 0, i64 [[IDX:%.*]] -; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4 -; CHECK-NEXT: ret i32 [[LOAD]] -; - %gep = getelementptr inbounds [4 x i32], ptr @constarray, i64 0, i64 %idx - %load = load i32, ptr %gep - ret i32 %load -} - -define i8 @load_i8_multi_gep_const_array(i64 %idx1, i64 %idx2) { -; CHECK-LABEL: @load_i8_multi_gep_const_array( -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr @constarray, i64 [[IDX1:%.*]] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[GEP1]], i64 [[IDX2:%.*]] -; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1 -; CHECK-NEXT: ret i8 [[LOAD]] -; - %gep1 = getelementptr inbounds i8, ptr @constarray, i64 %idx1 - %gep = getelementptr inbounds i8, ptr %gep1, i64 %idx2 - %load = load i8, ptr %gep - ret i8 %load -} - -; TODO: this should be ret i8 1 -define i8 @gep_load_i8_align2(i64 %idx){ -; CHECK-LABEL: @gep_load_i8_align2( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @constarray, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 2 -; CHECK-NEXT: ret i8 [[TMP2]] -; - %1 = getelementptr inbounds i8, ptr @constarray, i64 %idx - %2 = load i8, ptr %1, align 2 - ret i8 %2 -} - -; can't be folded -define i8 @gep_load_i8_align1(i64 %idx){ -; CHECK-LABEL: @gep_load_i8_align1( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @constarray, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 -; CHECK-NEXT: ret i8 [[TMP2]] -; - %1 = getelementptr inbounds i8, ptr @constarray, i64 %idx - %2 = load i8, ptr %1, align 1 - ret i8 %2 -} - -; TODO: this should be ret i8 65537 on the case for little endian -define i32 @gep_i32_load_i32_align4(i64 %idx){ -; CHECK-LABEL: @gep_i32_load_i32_align4( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr @constarray, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -; CHECK-NEXT: ret i32 [[TMP2]] -; - %1 = getelementptr inbounds i32, ptr @constarray, i64 %idx - %2 = load i32, ptr %1, align 4 - ret i32 %2 -} - -; TODO: this should be ret i8 65537 on the case for little endian -define i32 @gep_i32_load_i32_align4_struct(i64 %idx){ -; CHECK-LABEL: @gep_i32_load_i32_align4_struct( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr @conststruct, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -; CHECK-NEXT: ret i32 [[TMP2]] -; - %1 = getelementptr inbounds i32, ptr @conststruct, i64 %idx - %2 = load i32, ptr %1, align 4 - ret i32 %2 -} - -; can't be folded -define i32 @gep_i8_load_i32_align1(i64 %idx){ -; CHECK-LABEL: @gep_i8_load_i32_align1( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @constarray, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 1 -; CHECK-NEXT: ret i32 [[TMP2]] -; - %1 = getelementptr inbounds i8, ptr @constarray, i64 %idx - %2 = load i32, ptr %1, align 1 - ret i32 %2 -} - -; can't be folded -define i32 @gep_i8_load_i32_align1_struct(i64 %idx){ -; CHECK-LABEL: @gep_i8_load_i32_align1_struct( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @conststruct, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 1 -; CHECK-NEXT: ret i32 [[TMP2]] -; - %1 = getelementptr inbounds i8, ptr @conststruct, i64 %idx - %2 = load i32, ptr %1, align 1 - ret i32 %2 -} -; TODO: This could be folded but need to see GEP source types -define i32 @gep_i16_load_i32_align1(i64 %idx){ -; CHECK-LABEL: @gep_i16_load_i32_align1( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr @constarray, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 1 -; CHECK-NEXT: ret i32 [[TMP2]] -; - %1 = getelementptr inbounds i16, ptr @constarray, i64 %idx - %2 = load i32, ptr %1, align 1 - ret i32 %2 -} diff --git a/llvm/test/Transforms/InstSimplify/load.ll b/llvm/test/Transforms/InstSimplify/load.ll index 2e2b1b14ddd0f..8b9a607f77b7d 100644 --- a/llvm/test/Transforms/InstSimplify/load.ll +++ b/llvm/test/Transforms/InstSimplify/load.ll @@ -3,6 +3,7 @@ @zeroinit = constant {} zeroinitializer @poison = constant {} poison +@constzeroarray = internal constant [4 x i32] zeroinitializer define i32 @crash_on_zeroinit() { ; CHECK-LABEL: @crash_on_zeroinit( @@ -40,3 +41,22 @@ define <3 x float> @load_vec3() { %1 = load <3 x float>, ptr getelementptr inbounds (<3 x float>, ptr @constvec, i64 1) ret <3 x float> %1 } + +define i32 @load_gep_const_zero_array(i64 %idx) { +; CHECK-LABEL: @load_gep_const_zero_array( +; CHECK-NEXT: ret i32 0 +; + %gep = getelementptr inbounds [4 x i32], ptr @constzeroarray, i64 0, i64 %idx + %load = load i32, ptr %gep + ret i32 %load +} + +define i8 @load_i8_multi_gep_const_zero_array(i64 %idx1, i64 %idx2) { +; CHECK-LABEL: @load_i8_multi_gep_const_zero_array( +; CHECK-NEXT: ret i8 0 +; + %gep1 = getelementptr inbounds i8, ptr @constzeroarray, i64 %idx1 + %gep = getelementptr inbounds i8, ptr %gep1, i64 %idx2 + %load = load i8, ptr %gep + ret i8 %load +} diff --git a/llvm/test/Transforms/JumpThreading/thread-loads.ll b/llvm/test/Transforms/JumpThreading/thread-loads.ll index a730be9492c80..85952e8e6db4f 100644 --- a/llvm/test/Transforms/JumpThreading/thread-loads.ll +++ b/llvm/test/Transforms/JumpThreading/thread-loads.ll @@ -322,12 +322,12 @@ bb3: define void @test8(ptr, ptr, ptr) { ; CHECK-LABEL: @test8( ; CHECK-NEXT: ret2: -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[TMP0:%.*]], align 4, !range [[RNG4:![0-9]+]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[TMP0:%.*]], align 4, !range [[RNG4:![0-9]+]], !noundef !5 ; CHECK-NEXT: store i32 [[A]], ptr [[TMP1:%.*]], align 4 ; CHECK-NEXT: [[XXX:%.*]] = tail call i32 (...) @f1() #[[ATTR0]] ; CHECK-NEXT: ret void ; - %a = load i32, ptr %0, !tbaa !0, !range !4, !alias.scope !9, !noalias !10 + %a = load i32, ptr %0, !tbaa !0, !range !4, !alias.scope !9, !noalias !10, !noundef !11 %b = load i32, ptr %0, !range !5 store i32 %a, ptr %1 %c = icmp eq i32 %b, 8 @@ -693,3 +693,4 @@ right_x: !8 = !{!8, !6} !9 = !{!7} !10 = !{!8} +!11 = !{} diff --git a/llvm/test/Transforms/LoopDataPrefetch/LoongArch/basic.ll b/llvm/test/Transforms/LoopDataPrefetch/LoongArch/basic.ll new file mode 100644 index 0000000000000..55a2a2970d2d7 --- /dev/null +++ b/llvm/test/Transforms/LoopDataPrefetch/LoongArch/basic.ll @@ -0,0 +1,25 @@ +;; Tag this 'XFAIL' because we need a few more TTIs and ISels. +; XFAIL: * +; RUN: opt --mtriple=loongarch64 --passes=loop-data-prefetch -loongarch-enable-loop-data-prefetch -S < %s | FileCheck %s + +define void @foo(ptr %a, ptr %b) { +entry: + br label %for.body + +; CHECK: for.body: +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, ptr %b, i64 %indvars.iv +; CHECK: call void @llvm.prefetch + %0 = load double, ptr %arrayidx, align 8 + %add = fadd double %0, 1.000000e+00 + %arrayidx2 = getelementptr inbounds double, ptr %a, i64 %indvars.iv + store double %add, ptr %arrayidx2, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1600 + br i1 %exitcond, label %for.end, label %for.body + +; CHECK: for.end: +for.end: ; preds = %for.body + ret void +} diff --git a/llvm/test/Transforms/LoopDataPrefetch/LoongArch/lit.local.cfg b/llvm/test/Transforms/LoopDataPrefetch/LoongArch/lit.local.cfg new file mode 100644 index 0000000000000..2b5a4893e686f --- /dev/null +++ b/llvm/test/Transforms/LoopDataPrefetch/LoongArch/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'LoongArch' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/LoopRotate/pr51981-scev-problem.ll b/llvm/test/Transforms/LoopRotate/pr51981-scev-problem.ll index dd5031c56722f..2c2c88b6acb20 100644 --- a/llvm/test/Transforms/LoopRotate/pr51981-scev-problem.ll +++ b/llvm/test/Transforms/LoopRotate/pr51981-scev-problem.ll @@ -22,7 +22,7 @@ ; CHECK-SCEV: %narrow = trunc i32 %wide to i16 ; CHECK-SCEV: --> (trunc i32 %wide to i16) U: full-set S: full-set Exits: <> LoopDispositions: { %loop.outer.header: Variant, %loop.inner: Invariant } ; CHECK-SCEV: %iv = phi i16 [ %narrow, %loop.inner.ph ], [ %iv.plus, %loop.inner ] -; CHECK-SCEV: --> {(trunc i32 %wide to i16),+,1}<%loop.inner> U: full-set S: full-set Exits: (-1 + (700 umax (1 + (trunc i32 %wide to i16)))) LoopDispositions: { %loop.inner: Computable, %loop.outer.header: Variant } +; CHECK-SCEV: --> {(trunc i32 %wide to i16),+,1}<%loop.inner> U: full-set S: full-set Exits: (-1 + (700 umax (1 + (trunc i32 %wide to i16)))) LoopDispositions: { %loop.inner: Computable, %loop.outer.header: Variant } ; ; CHECK-SCEV: Classifying expressions for: @test_function ; CHECK-SCEV: %wide1 = load i32, ptr @offset, align 1 @@ -32,7 +32,7 @@ ; CHECK-SCEV: %narrow = trunc i32 %wide2 to i16 ; CHECK-SCEV: --> (trunc i32 %wide2 to i16) U: full-set S: full-set Exits: <> LoopDispositions: { %loop.inner.ph: Variant, %loop.inner: Invariant } ; CHECK-SCEV: %iv = phi i16 [ %narrow, %loop.inner.ph ], [ %iv.plus, %loop.inner ] -; CHECK-SCEV: --> {(trunc i32 %wide2 to i16),+,1}<%loop.inner> U: full-set S: full-set Exits: (-1 + (700 umax (1 + (trunc i32 %wide2 to i16)))) LoopDispositions: { %loop.inner: Computable, %loop.inner.ph: Variant } +; CHECK-SCEV: --> {(trunc i32 %wide2 to i16),+,1}<%loop.inner> U: full-set S: full-set Exits: (-1 + (700 umax (1 + (trunc i32 %wide2 to i16)))) LoopDispositions: { %loop.inner: Computable, %loop.inner.ph: Variant } @offset = external dso_local global i32, align 1 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll b/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll index d58ff5051c621..4b93ea30cf252 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll @@ -14,27 +14,30 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v) { ; CHECK-LABEL: @vector_add( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: store [[TMP5]], ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -49,7 +52,7 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v) { ; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -74,32 +77,35 @@ define i64 @vector_add_reduce(ptr noalias nocapture %a) { ; CHECK-LABEL: @vector_add_reduce( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5]] = add [[VEC_PHI]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7]] = add [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.add.nxv1i64( [[TMP5]]) +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP7]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -111,7 +117,7 @@ define i64 @vector_add_reduce(ptr noalias nocapture %a) { ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[SUM_NEXT_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll index 7d079d13dc710..119f50df5b8e3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll @@ -11,27 +11,30 @@ define void @vector_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-LABEL: @vector_udiv( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = udiv [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: store [[TMP5]], ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = udiv [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -54,26 +57,26 @@ define void @vector_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: entry: ; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; FIXED: vector.ph: -; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0 -; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer +; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 +; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 -; FIXED-NEXT: [[TMP6:%.*]] = udiv <2 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; FIXED-NEXT: [[TMP7:%.*]] = udiv <2 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]] -; FIXED-NEXT: store <2 x i64> [[TMP6]], ptr [[TMP4]], align 8 -; FIXED-NEXT: store <2 x i64> [[TMP7]], ptr [[TMP5]], align 8 -; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 +; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 +; FIXED-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; FIXED-NEXT: [[TMP7:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]] +; FIXED-NEXT: store <4 x i64> [[TMP6]], ptr [[TMP4]], align 8 +; FIXED-NEXT: store <4 x i64> [[TMP7]], ptr [[TMP5]], align 8 +; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; FIXED: middle.block: @@ -115,27 +118,30 @@ define void @vector_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-LABEL: @vector_sdiv( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = sdiv [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: store [[TMP5]], ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = sdiv [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -158,26 +164,26 @@ define void @vector_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: entry: ; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; FIXED: vector.ph: -; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0 -; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer +; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 +; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 -; FIXED-NEXT: [[TMP6:%.*]] = sdiv <2 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; FIXED-NEXT: [[TMP7:%.*]] = sdiv <2 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]] -; FIXED-NEXT: store <2 x i64> [[TMP6]], ptr [[TMP4]], align 8 -; FIXED-NEXT: store <2 x i64> [[TMP7]], ptr [[TMP5]], align 8 -; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 +; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 +; FIXED-NEXT: [[TMP6:%.*]] = sdiv <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; FIXED-NEXT: [[TMP7:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]] +; FIXED-NEXT: store <4 x i64> [[TMP6]], ptr [[TMP4]], align 8 +; FIXED-NEXT: store <4 x i64> [[TMP7]], ptr [[TMP5]], align 8 +; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; FIXED: middle.block: @@ -219,27 +225,30 @@ define void @vector_urem(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-LABEL: @vector_urem( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = urem [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: store [[TMP5]], ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = urem [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -262,26 +271,26 @@ define void @vector_urem(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: entry: ; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; FIXED: vector.ph: -; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0 -; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer +; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 +; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 -; FIXED-NEXT: [[TMP6:%.*]] = urem <2 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; FIXED-NEXT: [[TMP7:%.*]] = urem <2 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]] -; FIXED-NEXT: store <2 x i64> [[TMP6]], ptr [[TMP4]], align 8 -; FIXED-NEXT: store <2 x i64> [[TMP7]], ptr [[TMP5]], align 8 -; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 +; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 +; FIXED-NEXT: [[TMP6:%.*]] = urem <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; FIXED-NEXT: [[TMP7:%.*]] = urem <4 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]] +; FIXED-NEXT: store <4 x i64> [[TMP6]], ptr [[TMP4]], align 8 +; FIXED-NEXT: store <4 x i64> [[TMP7]], ptr [[TMP5]], align 8 +; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; FIXED: middle.block: @@ -323,27 +332,30 @@ define void @vector_srem(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-LABEL: @vector_srem( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = srem [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: store [[TMP5]], ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = srem [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -366,26 +378,26 @@ define void @vector_srem(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: entry: ; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; FIXED: vector.ph: -; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0 -; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer +; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 +; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 -; FIXED-NEXT: [[TMP6:%.*]] = srem <2 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; FIXED-NEXT: [[TMP7:%.*]] = srem <2 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]] -; FIXED-NEXT: store <2 x i64> [[TMP6]], ptr [[TMP4]], align 8 -; FIXED-NEXT: store <2 x i64> [[TMP7]], ptr [[TMP5]], align 8 -; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 +; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 +; FIXED-NEXT: [[TMP6:%.*]] = srem <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; FIXED-NEXT: [[TMP7:%.*]] = srem <4 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]] +; FIXED-NEXT: store <4 x i64> [[TMP6]], ptr [[TMP4]], align 8 +; FIXED-NEXT: store <4 x i64> [[TMP7]], ptr [[TMP5]], align 8 +; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; FIXED: middle.block: @@ -427,31 +439,34 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-LABEL: @predicated_udiv( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = select [[TMP5]], [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP7:%.*]] = udiv [[WIDE_LOAD]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = xor [[TMP5]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP5]], [[TMP7]], [[WIDE_LOAD]] -; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = select [[TMP7]], [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = udiv [[WIDE_LOAD]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = xor [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], [[TMP9]], [[WIDE_LOAD]] +; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -480,34 +495,34 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: entry: ; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; FIXED: vector.ph: -; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0 -; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer +; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 +; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 -; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer -; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT3]], zeroinitializer -; FIXED-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[BROADCAST_SPLAT]], <2 x i64> -; FIXED-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP7]], <2 x i64> [[BROADCAST_SPLAT3]], <2 x i64> -; FIXED-NEXT: [[TMP10:%.*]] = udiv <2 x i64> [[WIDE_LOAD]], [[TMP8]] -; FIXED-NEXT: [[TMP11:%.*]] = udiv <2 x i64> [[WIDE_LOAD1]], [[TMP9]] -; FIXED-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[TMP6]], -; FIXED-NEXT: [[TMP13:%.*]] = xor <2 x i1> [[TMP7]], -; FIXED-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[TMP10]], <2 x i64> [[WIDE_LOAD]] -; FIXED-NEXT: [[PREDPHI4:%.*]] = select <2 x i1> [[TMP7]], <2 x i64> [[TMP11]], <2 x i64> [[WIDE_LOAD1]] -; FIXED-NEXT: store <2 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 -; FIXED-NEXT: store <2 x i64> [[PREDPHI4]], ptr [[TMP5]], align 8 -; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 +; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 +; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer +; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT3]], zeroinitializer +; FIXED-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> +; FIXED-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[BROADCAST_SPLAT3]], <4 x i64> +; FIXED-NEXT: [[TMP10:%.*]] = udiv <4 x i64> [[WIDE_LOAD]], [[TMP8]] +; FIXED-NEXT: [[TMP11:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], [[TMP9]] +; FIXED-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[TMP6]], +; FIXED-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP7]], +; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP10]], <4 x i64> [[WIDE_LOAD]] +; FIXED-NEXT: [[PREDPHI4:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP11]], <4 x i64> [[WIDE_LOAD1]] +; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 +; FIXED-NEXT: store <4 x i64> [[PREDPHI4]], ptr [[TMP5]], align 8 +; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; FIXED: middle.block: @@ -561,31 +576,34 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-LABEL: @predicated_sdiv( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = select [[TMP5]], [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP7:%.*]] = sdiv [[WIDE_LOAD]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = xor [[TMP5]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP5]], [[TMP7]], [[WIDE_LOAD]] -; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = select [[TMP7]], [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = sdiv [[WIDE_LOAD]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = xor [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], [[TMP9]], [[WIDE_LOAD]] +; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -614,34 +632,34 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: entry: ; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; FIXED: vector.ph: -; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0 -; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer +; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 +; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 -; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer -; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT3]], zeroinitializer -; FIXED-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[BROADCAST_SPLAT]], <2 x i64> -; FIXED-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP7]], <2 x i64> [[BROADCAST_SPLAT3]], <2 x i64> -; FIXED-NEXT: [[TMP10:%.*]] = sdiv <2 x i64> [[WIDE_LOAD]], [[TMP8]] -; FIXED-NEXT: [[TMP11:%.*]] = sdiv <2 x i64> [[WIDE_LOAD1]], [[TMP9]] -; FIXED-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[TMP6]], -; FIXED-NEXT: [[TMP13:%.*]] = xor <2 x i1> [[TMP7]], -; FIXED-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[TMP10]], <2 x i64> [[WIDE_LOAD]] -; FIXED-NEXT: [[PREDPHI4:%.*]] = select <2 x i1> [[TMP7]], <2 x i64> [[TMP11]], <2 x i64> [[WIDE_LOAD1]] -; FIXED-NEXT: store <2 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 -; FIXED-NEXT: store <2 x i64> [[PREDPHI4]], ptr [[TMP5]], align 8 -; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 +; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 +; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer +; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT3]], zeroinitializer +; FIXED-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> +; FIXED-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[BROADCAST_SPLAT3]], <4 x i64> +; FIXED-NEXT: [[TMP10:%.*]] = sdiv <4 x i64> [[WIDE_LOAD]], [[TMP8]] +; FIXED-NEXT: [[TMP11:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], [[TMP9]] +; FIXED-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[TMP6]], +; FIXED-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP7]], +; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP10]], <4 x i64> [[WIDE_LOAD]] +; FIXED-NEXT: [[PREDPHI4:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP11]], <4 x i64> [[WIDE_LOAD1]] +; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 +; FIXED-NEXT: store <4 x i64> [[PREDPHI4]], ptr [[TMP5]], align 8 +; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; FIXED: middle.block: @@ -695,28 +713,31 @@ define void @predicated_udiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; CHECK-LABEL: @predicated_udiv_by_constant( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 42, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP6:%.*]] = udiv [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 27, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP7:%.*]] = xor [[TMP5]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP5]], [[TMP6]], [[WIDE_LOAD]] -; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 42, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP8:%.*]] = udiv [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 27, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = xor [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], [[TMP8]], [[WIDE_LOAD]] +; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -749,24 +770,24 @@ define void @predicated_udiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 -; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <2 x i64> [[WIDE_LOAD]], -; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <2 x i64> [[WIDE_LOAD1]], -; FIXED-NEXT: [[TMP8:%.*]] = udiv <2 x i64> [[WIDE_LOAD]], -; FIXED-NEXT: [[TMP9:%.*]] = udiv <2 x i64> [[WIDE_LOAD1]], -; FIXED-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP6]], -; FIXED-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[TMP7]], -; FIXED-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[TMP8]], <2 x i64> [[WIDE_LOAD]] -; FIXED-NEXT: [[PREDPHI2:%.*]] = select <2 x i1> [[TMP7]], <2 x i64> [[TMP9]], <2 x i64> [[WIDE_LOAD1]] -; FIXED-NEXT: store <2 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 -; FIXED-NEXT: store <2 x i64> [[PREDPHI2]], ptr [[TMP5]], align 8 -; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 +; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 +; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD]], +; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD1]], +; FIXED-NEXT: [[TMP8:%.*]] = udiv <4 x i64> [[WIDE_LOAD]], +; FIXED-NEXT: [[TMP9:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], +; FIXED-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], +; FIXED-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], +; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD]] +; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP9]], <4 x i64> [[WIDE_LOAD1]] +; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 +; FIXED-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP5]], align 8 +; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXED-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; FIXED: middle.block: @@ -820,28 +841,31 @@ define void @predicated_sdiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; CHECK-LABEL: @predicated_sdiv_by_constant( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 42, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP6:%.*]] = sdiv [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 27, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP7:%.*]] = xor [[TMP5]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP5]], [[TMP6]], [[WIDE_LOAD]] -; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 42, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP8:%.*]] = sdiv [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 27, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = xor [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], [[TMP8]], [[WIDE_LOAD]] +; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -874,24 +898,24 @@ define void @predicated_sdiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 -; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <2 x i64> [[WIDE_LOAD]], -; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <2 x i64> [[WIDE_LOAD1]], -; FIXED-NEXT: [[TMP8:%.*]] = sdiv <2 x i64> [[WIDE_LOAD]], -; FIXED-NEXT: [[TMP9:%.*]] = sdiv <2 x i64> [[WIDE_LOAD1]], -; FIXED-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP6]], -; FIXED-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[TMP7]], -; FIXED-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[TMP8]], <2 x i64> [[WIDE_LOAD]] -; FIXED-NEXT: [[PREDPHI2:%.*]] = select <2 x i1> [[TMP7]], <2 x i64> [[TMP9]], <2 x i64> [[WIDE_LOAD1]] -; FIXED-NEXT: store <2 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 -; FIXED-NEXT: store <2 x i64> [[PREDPHI2]], ptr [[TMP5]], align 8 -; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 +; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 +; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD]], +; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD1]], +; FIXED-NEXT: [[TMP8:%.*]] = sdiv <4 x i64> [[WIDE_LOAD]], +; FIXED-NEXT: [[TMP9:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], +; FIXED-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], +; FIXED-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], +; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD]] +; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP9]], <4 x i64> [[WIDE_LOAD1]] +; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 +; FIXED-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP5]], align 8 +; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXED-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; FIXED: middle.block: @@ -945,12 +969,12 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; CHECK-LABEL: @predicated_sdiv_by_minus_one( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 16 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -959,15 +983,15 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 1 -; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 -128, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP8:%.*]] = select [[TMP7]], shufflevector ( insertelement ( poison, i8 -1, i64 0), poison, zeroinitializer), shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP9:%.*]] = sdiv [[WIDE_LOAD]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = xor [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], [[TMP9]], [[WIDE_LOAD]] -; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP6]], align 1 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 -128, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP8:%.*]] = select [[TMP7]], shufflevector ( insertelement ( poison, i8 -1, i64 0), poison, zeroinitializer), shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = sdiv [[WIDE_LOAD]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = xor [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], [[TMP9]], [[WIDE_LOAD]] +; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP6]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 8 +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 16 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] @@ -1003,26 +1027,26 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 16 +; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 32 ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]] ; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP1]] ; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 -; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 -; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 16 -; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 -; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <16 x i8> [[WIDE_LOAD]], -; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <16 x i8> [[WIDE_LOAD1]], -; FIXED-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> , <16 x i8> -; FIXED-NEXT: [[TMP9:%.*]] = select <16 x i1> [[TMP7]], <16 x i8> , <16 x i8> -; FIXED-NEXT: [[TMP10:%.*]] = sdiv <16 x i8> [[WIDE_LOAD]], [[TMP8]] -; FIXED-NEXT: [[TMP11:%.*]] = sdiv <16 x i8> [[WIDE_LOAD1]], [[TMP9]] -; FIXED-NEXT: [[TMP12:%.*]] = xor <16 x i1> [[TMP6]], -; FIXED-NEXT: [[TMP13:%.*]] = xor <16 x i1> [[TMP7]], -; FIXED-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> [[TMP10]], <16 x i8> [[WIDE_LOAD]] -; FIXED-NEXT: [[PREDPHI2:%.*]] = select <16 x i1> [[TMP7]], <16 x i8> [[TMP11]], <16 x i8> [[WIDE_LOAD1]] -; FIXED-NEXT: store <16 x i8> [[PREDPHI]], ptr [[TMP4]], align 1 -; FIXED-NEXT: store <16 x i8> [[PREDPHI2]], ptr [[TMP5]], align 1 -; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 +; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[TMP4]], align 1 +; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 32 +; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <32 x i8>, ptr [[TMP5]], align 1 +; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <32 x i8> [[WIDE_LOAD]], +; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <32 x i8> [[WIDE_LOAD1]], +; FIXED-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP6]], <32 x i8> , <32 x i8> +; FIXED-NEXT: [[TMP9:%.*]] = select <32 x i1> [[TMP7]], <32 x i8> , <32 x i8> +; FIXED-NEXT: [[TMP10:%.*]] = sdiv <32 x i8> [[WIDE_LOAD]], [[TMP8]] +; FIXED-NEXT: [[TMP11:%.*]] = sdiv <32 x i8> [[WIDE_LOAD1]], [[TMP9]] +; FIXED-NEXT: [[TMP12:%.*]] = xor <32 x i1> [[TMP6]], +; FIXED-NEXT: [[TMP13:%.*]] = xor <32 x i1> [[TMP7]], +; FIXED-NEXT: [[PREDPHI:%.*]] = select <32 x i1> [[TMP6]], <32 x i8> [[TMP10]], <32 x i8> [[WIDE_LOAD]] +; FIXED-NEXT: [[PREDPHI2:%.*]] = select <32 x i1> [[TMP7]], <32 x i8> [[TMP11]], <32 x i8> [[WIDE_LOAD1]] +; FIXED-NEXT: store <32 x i8> [[PREDPHI]], ptr [[TMP4]], align 1 +; FIXED-NEXT: store <32 x i8> [[PREDPHI2]], ptr [[TMP5]], align 1 +; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 ; FIXED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; FIXED: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll index 56c8f901a668d..0f7600e9b2235 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll @@ -102,31 +102,31 @@ define void @uniform_store_i1(ptr noalias %dst, ptr noalias %start, i64 %N) { ; CHECK-LABEL: @uniform_store_i1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 64 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 32 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 64 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[N_VEC]], 8 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[TMP1]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x ptr> poison, ptr [[START]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x ptr> [[BROADCAST_SPLATINSERT]], <16 x ptr> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x ptr> poison, ptr [[START]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x ptr> [[BROADCAST_SPLATINSERT2]], <16 x ptr> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x ptr> poison, ptr [[START]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x ptr> [[BROADCAST_SPLATINSERT]], <32 x ptr> poison, <32 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <32 x ptr> poison, ptr [[START]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <32 x ptr> [[BROADCAST_SPLATINSERT2]], <32 x ptr> poison, <32 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <16 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <16 x i64> -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, <16 x ptr> [[TMP2]], i64 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, <16 x ptr> [[TMP3]], i64 1 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <16 x ptr> [[TMP4]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <16 x ptr> [[TMP5]], [[BROADCAST_SPLAT3]] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x i1> [[TMP7]], i32 15 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <32 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <32 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, <32 x ptr> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, <32 x ptr> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <32 x ptr> [[TMP4]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <32 x ptr> [[TMP5]], [[BROADCAST_SPLAT3]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <32 x i1> [[TMP7]], i32 31 ; CHECK-NEXT: store i1 [[TMP8]], ptr [[DST:%.*]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 256 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 512 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll index 2eb8ac4086f78..1310ed3618b2c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll @@ -13,31 +13,31 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; OUTLOOP-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; OUTLOOP: for.body.preheader: ; OUTLOOP-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() -; OUTLOOP-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 2 +; OUTLOOP-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 4 ; OUTLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], [[TMP1]] ; OUTLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; OUTLOOP: vector.ph: ; OUTLOOP-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; OUTLOOP-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 2 +; OUTLOOP-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 4 ; OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP3]] ; OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; OUTLOOP: vector.body: ; OUTLOOP-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; OUTLOOP-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; OUTLOOP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP4]] ; OUTLOOP-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 -; OUTLOOP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 2 -; OUTLOOP-NEXT: [[TMP7:%.*]] = sext [[WIDE_LOAD]] to -; OUTLOOP-NEXT: [[TMP8]] = add [[VEC_PHI]], [[TMP7]] +; OUTLOOP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 2 +; OUTLOOP-NEXT: [[TMP7:%.*]] = sext [[WIDE_LOAD]] to +; OUTLOOP-NEXT: [[TMP8]] = add [[VEC_PHI]], [[TMP7]] ; OUTLOOP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() -; OUTLOOP-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 2 +; OUTLOOP-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4 ; OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP10]] ; OUTLOOP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; OUTLOOP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; OUTLOOP: middle.block: -; OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32( [[TMP8]]) +; OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP8]]) ; OUTLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; OUTLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; OUTLOOP: scalar.ph: @@ -67,12 +67,12 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; INLOOP-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; INLOOP: for.body.preheader: ; INLOOP-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() -; INLOOP-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 4 +; INLOOP-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 8 ; INLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], [[TMP1]] ; INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; INLOOP: vector.ph: ; INLOOP-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; INLOOP-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 4 +; INLOOP-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 8 ; INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP3]] ; INLOOP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -82,12 +82,12 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; INLOOP-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; INLOOP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP4]] ; INLOOP-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 -; INLOOP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 2 -; INLOOP-NEXT: [[TMP7:%.*]] = sext [[WIDE_LOAD]] to -; INLOOP-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP7]]) +; INLOOP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 2 +; INLOOP-NEXT: [[TMP7:%.*]] = sext [[WIDE_LOAD]] to +; INLOOP-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32( [[TMP7]]) ; INLOOP-NEXT: [[TMP9]] = add i32 [[TMP8]], [[VEC_PHI]] ; INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vscale.i32() -; INLOOP-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], 4 +; INLOOP-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], 8 ; INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP11]] ; INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; INLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll index b81d14c520770..827131ed19117 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll @@ -13,18 +13,18 @@ define void @load_store_factor2_i32(ptr %p) { ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[STRIDED_VEC]], +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP1]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[STRIDED_VEC1]], +; CHECK-NEXT: [[TMP7:%.*]] = add <8 x i32> [[STRIDED_VEC1]], ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP6]], i32 -1 -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP7]], <8 x i32> -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <8 x i32> -; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP7]], <16 x i32> +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -125,46 +125,46 @@ define void @load_store_factor3_i32(ptr %p) { ; CHECK-LABEL: @load_store_factor3_i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] +; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = mul [[VEC_IND]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[P:%.*]], [[TMP10]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i32.nxv2p0( [[TMP11]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP12:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i32.nxv2p0( [[TMP12]], [[TMP11]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP13:%.*]] = add [[TMP10]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[P]], [[TMP13]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call @llvm.masked.gather.nxv2i32.nxv2p0( [[TMP14]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP15:%.*]] = add [[WIDE_MASKED_GATHER1]], shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i32.nxv2p0( [[TMP15]], [[TMP14]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP16:%.*]] = add [[TMP13]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[P]], [[TMP16]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv2i32.nxv2p0( [[TMP17]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP18:%.*]] = add [[WIDE_MASKED_GATHER2]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i32.nxv2p0( [[TMP18]], [[TMP17]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP10:%.*]] = mul [[VEC_IND]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[P:%.*]], [[TMP10]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP11]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP12:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP12]], [[TMP11]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP13:%.*]] = add [[TMP10]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[P]], [[TMP13]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP14]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP15:%.*]] = add [[WIDE_MASKED_GATHER1]], shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP15]], [[TMP14]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP16:%.*]] = add [[TMP13]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[P]], [[TMP16]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP17]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP18:%.*]] = add [[WIDE_MASKED_GATHER2]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP18]], [[TMP17]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2 +; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP20]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -395,7 +395,7 @@ exit: ret void } -define void @combine_load_factor2_i32(ptr %p) { +define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) { ; CHECK-LABEL: @combine_load_factor2_i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] @@ -403,30 +403,31 @@ define void @combine_load_factor2_i32(ptr %p) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], <4 x i64> [[TMP0]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[P]], <4 x i64> [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x ptr> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 -; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <8 x i32> [[WIDE_VEC2]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <8 x i32> [[WIDE_VEC2]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[STRIDED_VEC4]] -; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[STRIDED_VEC3]], [[STRIDED_VEC5]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP8]], <4 x ptr> [[TMP2]], i32 4, <4 x i1> ) -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP9]], <4 x ptr> [[TMP3]], i32 4, <4 x i1> ) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <16 x i32>, ptr [[TMP7]], align 4 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = add <8 x i32> [[STRIDED_VEC]], [[STRIDED_VEC3]] +; CHECK-NEXT: [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], [[STRIDED_VEC4]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[Q:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[Q]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0 +; CHECK-NEXT: store <8 x i32> [[TMP8]], ptr [[TMP12]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP10]], i32 8 +; CHECK-NEXT: store <8 x i32> [[TMP9]], ptr [[TMP13]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -442,7 +443,8 @@ define void @combine_load_factor2_i32(ptr %p) { ; CHECK-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]] ; CHECK-NEXT: [[X1:%.*]] = load i32, ptr [[Q1]], align 4 ; CHECK-NEXT: [[RES:%.*]] = add i32 [[X0]], [[X1]] -; CHECK-NEXT: store i32 [[RES]], ptr [[Q0]], align 4 +; CHECK-NEXT: [[DST:%.*]] = getelementptr i32, ptr [[Q]], i64 [[I]] +; CHECK-NEXT: store i32 [[RES]], ptr [[DST]], align 4 ; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1 ; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 ; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] @@ -464,7 +466,8 @@ loop: %res = add i32 %x0, %x1 - store i32 %res, ptr %q0 + %dst = getelementptr i32, ptr %q, i64 %i + store i32 %res, ptr %dst %nexti = add i64 %i, 1 %done = icmp eq i64 %nexti, 1024 @@ -473,23 +476,48 @@ exit: ret void } -define void @combine_load_factor2_i64(ptr %p) { +define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) { ; CHECK-LABEL: @combine_load_factor2_i64( ; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[TMP2]], i32 0 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], [[STRIDED_VEC1]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP6]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 -; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]] +; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] ; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 4 ; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 ; CHECK-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] ; CHECK-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 4 ; CHECK-NEXT: [[RES:%.*]] = add i64 [[X0]], [[X1]] -; CHECK-NEXT: store i64 [[RES]], ptr [[Q0]], align 4 +; CHECK-NEXT: [[DST:%.*]] = getelementptr i64, ptr [[Q]], i64 [[I]] +; CHECK-NEXT: store i64 [[RES]], ptr [[DST]], align 4 ; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1 ; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -508,7 +536,8 @@ loop: %res = add i64 %x0, %x1 - store i64 %res, ptr %q0 + %dst = getelementptr i64, ptr %q, i64 %i + store i64 %res, ptr %dst %nexti = add i64 %i, 1 %done = icmp eq i64 %nexti, 1024 @@ -516,3 +545,4 @@ loop: exit: ret void } + diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll b/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll index c456e0e1df7e8..9b3b90a7bc3b6 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S | FileCheck %s -check-prefix=LMUL1 ; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S --riscv-v-register-bit-width-lmul=1 | FileCheck %s -check-prefix=LMUL1 ; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S --riscv-v-register-bit-width-lmul=2 | FileCheck %s -check-prefix=LMUL2 ; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S --riscv-v-register-bit-width-lmul=4 | FileCheck %s -check-prefix=LMUL4 ; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S --riscv-v-register-bit-width-lmul=8 | FileCheck %s -check-prefix=LMUL8 +; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S | FileCheck %s -check-prefix=LMUL2 define void @load_store(ptr %p) { ; LMUL1-LABEL: @load_store( diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll index dc4522756c369..ace267d72dea0 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll @@ -9,9 +9,9 @@ define void @trip5_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 16 ; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 5, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] @@ -20,18 +20,18 @@ define void @trip5_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP5]], i64 5) +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP5]], i64 5) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8i8.p0(ptr [[TMP7]], i32 1, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP8:%.*]] = shl [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr [[TMP7]], i32 1, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP8:%.*]] = shl [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv8i8.p0(ptr [[TMP10]], i32 1, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP11:%.*]] = add [[TMP8]], [[WIDE_MASKED_LOAD1]] -; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0( [[TMP11]], ptr [[TMP10]], i32 1, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr [[TMP10]], i32 1, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP11:%.*]] = add [[TMP8]], [[WIDE_MASKED_LOAD1]] +; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP11]], ptr [[TMP10]], i32 1, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 8 +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP13]] ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll index 3d580a5671783..34b06972dab06 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll @@ -12,44 +12,44 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) { ; VLENUNK-LABEL: @test( ; VLENUNK-NEXT: entry: ; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 ; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLENUNK: vector.ph: ; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLENUNK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; VLENUNK-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer -; VLENUNK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; VLENUNK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] +; VLENUNK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; VLENUNK-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer +; VLENUNK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; VLENUNK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] ; VLENUNK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; VLENUNK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 ; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]] -; VLENUNK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 -; VLENUNK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 -; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; VLENUNK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; VLENUNK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 +; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLENUNK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; VLENUNK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VLENUNK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 -; VLENUNK-NEXT: [[TMP11:%.*]] = icmp ult [[VEC_IND]], shufflevector ( insertelement ( poison, i64 512, i64 0), poison, zeroinitializer) +; VLENUNK-NEXT: [[TMP11:%.*]] = icmp ult [[VEC_IND]], shufflevector ( insertelement ( poison, i64 512, i64 0), poison, zeroinitializer) ; VLENUNK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP10]] ; VLENUNK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0 -; VLENUNK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i32.p0(ptr [[TMP13]], i32 4, [[TMP11]], poison) -; VLENUNK-NEXT: [[TMP14:%.*]] = xor [[TMP11]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; VLENUNK-NEXT: [[PREDPHI:%.*]] = select [[TMP14]], zeroinitializer, [[WIDE_MASKED_LOAD]] -; VLENUNK-NEXT: [[TMP15:%.*]] = add [[PREDPHI]], [[BROADCAST_SPLAT]] +; VLENUNK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP13]], i32 4, [[TMP11]], poison) +; VLENUNK-NEXT: [[TMP14:%.*]] = xor [[TMP11]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; VLENUNK-NEXT: [[PREDPHI:%.*]] = select [[TMP14]], zeroinitializer, [[WIDE_MASKED_LOAD]] +; VLENUNK-NEXT: [[TMP15:%.*]] = add [[PREDPHI]], [[BROADCAST_SPLAT]] ; VLENUNK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP10]] ; VLENUNK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 -; VLENUNK-NEXT: store [[TMP15]], ptr [[TMP17]], align 4 +; VLENUNK-NEXT: store [[TMP15]], ptr [[TMP17]], align 4 ; VLENUNK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 +; VLENUNK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4 ; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] -; VLENUNK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; VLENUNK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; VLENUNK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VLENUNK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VLENUNK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll index ac56579af2d26..00cabd58de913 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll @@ -17,8 +17,9 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-LABEL: @foo4( ; RV32-NEXT: entry: ; RV32-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; RV32-NEXT: [[TMP1:%.*]] = call i64 @llvm.umax.i64(i64 12, i64 [[TMP0]]) -; RV32-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 625, [[TMP1]] +; RV32-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; RV32-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]]) +; RV32-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 625, [[TMP2]] ; RV32-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; RV32: vector.memcheck: ; RV32-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 79880 @@ -33,37 +34,40 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]] ; RV32-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; RV32: vector.ph: -; RV32-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; RV32-NEXT: [[N_MOD_VF:%.*]] = urem i64 625, [[TMP2]] +; RV32-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; RV32-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 +; RV32-NEXT: [[N_MOD_VF:%.*]] = urem i64 625, [[TMP4]] ; RV32-NEXT: [[N_VEC:%.*]] = sub i64 625, [[N_MOD_VF]] ; RV32-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 16 -; RV32-NEXT: [[TMP3:%.*]] = call @llvm.experimental.stepvector.nxv1i64() -; RV32-NEXT: [[TMP4:%.*]] = add [[TMP3]], zeroinitializer -; RV32-NEXT: [[TMP5:%.*]] = mul [[TMP4]], shufflevector ( insertelement ( poison, i64 16, i64 0), poison, zeroinitializer) -; RV32-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP5]] -; RV32-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; RV32-NEXT: [[TMP7:%.*]] = mul i64 16, [[TMP6]] -; RV32-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 -; RV32-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; RV32-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; RV32-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer +; RV32-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 16, i64 0), poison, zeroinitializer) +; RV32-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; RV32-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; RV32-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; RV32-NEXT: [[TMP10:%.*]] = mul i64 16, [[TMP9]] +; RV32-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 +; RV32-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; RV32-NEXT: br label [[VECTOR_BODY:%.*]] ; RV32: vector.body: ; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; RV32-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; RV32-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], [[VEC_IND]] -; RV32-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv1i32.nxv1p0( [[TMP8]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !0 -; RV32-NEXT: [[TMP9:%.*]] = icmp slt [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer) -; RV32-NEXT: [[TMP10:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; RV32-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[B]], [[TMP10]] -; RV32-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call @llvm.masked.gather.nxv1f64.nxv1p0( [[TMP11]], i32 8, [[TMP9]], poison), !alias.scope !3 -; RV32-NEXT: [[TMP12:%.*]] = sitofp [[WIDE_MASKED_GATHER]] to -; RV32-NEXT: [[TMP13:%.*]] = fadd [[WIDE_MASKED_GATHER6]], [[TMP12]] -; RV32-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] -; RV32-NEXT: call void @llvm.masked.scatter.nxv1f64.nxv1p0( [[TMP13]], [[TMP14]], i32 8, [[TMP9]]), !alias.scope !5, !noalias !7 -; RV32-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; RV32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] -; RV32-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; RV32-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; RV32-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; RV32-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; RV32-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], [[VEC_IND]] +; RV32-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i32.nxv2p0( [[TMP11]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !0 +; RV32-NEXT: [[TMP12:%.*]] = icmp slt [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer) +; RV32-NEXT: [[TMP13:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; RV32-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[B]], [[TMP13]] +; RV32-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call @llvm.masked.gather.nxv2f64.nxv2p0( [[TMP14]], i32 8, [[TMP12]], poison), !alias.scope !3 +; RV32-NEXT: [[TMP15:%.*]] = sitofp [[WIDE_MASKED_GATHER]] to +; RV32-NEXT: [[TMP16:%.*]] = fadd [[WIDE_MASKED_GATHER6]], [[TMP15]] +; RV32-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] +; RV32-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0( [[TMP16]], [[TMP17]], i32 8, [[TMP12]]), !alias.scope !5, !noalias !7 +; RV32-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; RV32-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 +; RV32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] +; RV32-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; RV32-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; RV32-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; RV32: middle.block: ; RV32-NEXT: [[CMP_N:%.*]] = icmp eq i64 625, [[N_VEC]] ; RV32-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -73,30 +77,31 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32: for.body: ; RV32-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] ; RV32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]] -; RV32-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; RV32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP17]], 100 +; RV32-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; RV32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP21]], 100 ; RV32-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; RV32: if.then: -; RV32-NEXT: [[TMP18:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1 -; RV32-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP18]] -; RV32-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX3]], align 8 -; RV32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP17]] to double -; RV32-NEXT: [[ADD:%.*]] = fadd double [[TMP19]], [[CONV]] +; RV32-NEXT: [[TMP22:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1 +; RV32-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP22]] +; RV32-NEXT: [[TMP23:%.*]] = load double, ptr [[ARRAYIDX3]], align 8 +; RV32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP21]] to double +; RV32-NEXT: [[ADD:%.*]] = fadd double [[TMP23]], [[CONV]] ; RV32-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDVARS_IV]] ; RV32-NEXT: store double [[ADD]], ptr [[ARRAYIDX7]], align 8 ; RV32-NEXT: br label [[FOR_INC]] ; RV32: for.inc: ; RV32-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16 ; RV32-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], 10000 -; RV32-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP10:![0-9]+]] +; RV32-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP11:![0-9]+]] ; RV32: for.end: ; RV32-NEXT: ret void ; ; RV64-LABEL: @foo4( ; RV64-NEXT: entry: ; RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; RV64-NEXT: [[TMP1:%.*]] = call i64 @llvm.umax.i64(i64 12, i64 [[TMP0]]) -; RV64-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 625, [[TMP1]] +; RV64-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; RV64-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]]) +; RV64-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 625, [[TMP2]] ; RV64-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; RV64: vector.memcheck: ; RV64-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 79880 @@ -111,37 +116,40 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV64-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]] ; RV64-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; RV64: vector.ph: -; RV64-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; RV64-NEXT: [[N_MOD_VF:%.*]] = urem i64 625, [[TMP2]] +; RV64-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; RV64-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 +; RV64-NEXT: [[N_MOD_VF:%.*]] = urem i64 625, [[TMP4]] ; RV64-NEXT: [[N_VEC:%.*]] = sub i64 625, [[N_MOD_VF]] ; RV64-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 16 -; RV64-NEXT: [[TMP3:%.*]] = call @llvm.experimental.stepvector.nxv1i64() -; RV64-NEXT: [[TMP4:%.*]] = add [[TMP3]], zeroinitializer -; RV64-NEXT: [[TMP5:%.*]] = mul [[TMP4]], shufflevector ( insertelement ( poison, i64 16, i64 0), poison, zeroinitializer) -; RV64-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP5]] -; RV64-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; RV64-NEXT: [[TMP7:%.*]] = mul i64 16, [[TMP6]] -; RV64-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 -; RV64-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; RV64-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; RV64-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer +; RV64-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 16, i64 0), poison, zeroinitializer) +; RV64-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; RV64-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; RV64-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; RV64-NEXT: [[TMP10:%.*]] = mul i64 16, [[TMP9]] +; RV64-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 +; RV64-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; RV64-NEXT: br label [[VECTOR_BODY:%.*]] ; RV64: vector.body: ; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; RV64-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; RV64-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], [[VEC_IND]] -; RV64-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv1i32.nxv1p0( [[TMP8]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !0 -; RV64-NEXT: [[TMP9:%.*]] = icmp slt [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer) -; RV64-NEXT: [[TMP10:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; RV64-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[B]], [[TMP10]] -; RV64-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call @llvm.masked.gather.nxv1f64.nxv1p0( [[TMP11]], i32 8, [[TMP9]], poison), !alias.scope !3 -; RV64-NEXT: [[TMP12:%.*]] = sitofp [[WIDE_MASKED_GATHER]] to -; RV64-NEXT: [[TMP13:%.*]] = fadd [[WIDE_MASKED_GATHER6]], [[TMP12]] -; RV64-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] -; RV64-NEXT: call void @llvm.masked.scatter.nxv1f64.nxv1p0( [[TMP13]], [[TMP14]], i32 8, [[TMP9]]), !alias.scope !5, !noalias !7 -; RV64-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; RV64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] -; RV64-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; RV64-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; RV64-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; RV64-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; RV64-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], [[VEC_IND]] +; RV64-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i32.nxv2p0( [[TMP11]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !0 +; RV64-NEXT: [[TMP12:%.*]] = icmp slt [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer) +; RV64-NEXT: [[TMP13:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; RV64-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[B]], [[TMP13]] +; RV64-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call @llvm.masked.gather.nxv2f64.nxv2p0( [[TMP14]], i32 8, [[TMP12]], poison), !alias.scope !3 +; RV64-NEXT: [[TMP15:%.*]] = sitofp [[WIDE_MASKED_GATHER]] to +; RV64-NEXT: [[TMP16:%.*]] = fadd [[WIDE_MASKED_GATHER6]], [[TMP15]] +; RV64-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] +; RV64-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0( [[TMP16]], [[TMP17]], i32 8, [[TMP12]]), !alias.scope !5, !noalias !7 +; RV64-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; RV64-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 +; RV64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] +; RV64-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; RV64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; RV64-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; RV64: middle.block: ; RV64-NEXT: [[CMP_N:%.*]] = icmp eq i64 625, [[N_VEC]] ; RV64-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -151,22 +159,22 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV64: for.body: ; RV64-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] ; RV64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]] -; RV64-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; RV64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP17]], 100 +; RV64-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; RV64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP21]], 100 ; RV64-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; RV64: if.then: -; RV64-NEXT: [[TMP18:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1 -; RV64-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP18]] -; RV64-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX3]], align 8 -; RV64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP17]] to double -; RV64-NEXT: [[ADD:%.*]] = fadd double [[TMP19]], [[CONV]] +; RV64-NEXT: [[TMP22:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1 +; RV64-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP22]] +; RV64-NEXT: [[TMP23:%.*]] = load double, ptr [[ARRAYIDX3]], align 8 +; RV64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP21]] to double +; RV64-NEXT: [[ADD:%.*]] = fadd double [[TMP23]], [[CONV]] ; RV64-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDVARS_IV]] ; RV64-NEXT: store double [[ADD]], ptr [[ARRAYIDX7]], align 8 ; RV64-NEXT: br label [[FOR_INC]] ; RV64: for.inc: ; RV64-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16 ; RV64-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], 10000 -; RV64-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP10:![0-9]+]] +; RV64-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP11:![0-9]+]] ; RV64: for.end: ; RV64-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll index 5fc46c203167f..7f4eb387a1ece 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll @@ -5,8 +5,8 @@ ; CHECK-LABEL: foo ; CHECK: LV: IC is 2 -; CHECK: %{{.*}} = add <4 x i32> %{{.*}}, -; CHECK: %{{.*}} = add {{.*}}, 8 +; CHECK: %{{.*}} = add <8 x i32> %{{.*}}, +; CHECK: %{{.*}} = add {{.*}}, 16 ; Function Attrs: nofree norecurse nosync nounwind writeonly define dso_local void @foo(i32 signext %n, ptr nocapture %A) local_unnamed_addr #0 { diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-unroll.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-unroll.ll index 4e9ec86df7ca0..e8d5dc6211b70 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-unroll.ll @@ -14,10 +14,10 @@ define ptr @array_add(ptr noalias nocapture readonly %a, ptr noalias nocapture r ; LMUL1-NEXT: [[TMP0:%.*]] = add i32 [[SIZE]], -1 ; LMUL1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; LMUL1-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; LMUL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 +; LMUL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8 ; LMUL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; LMUL1: vector.ph: -; LMUL1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 +; LMUL1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8 ; LMUL1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; LMUL1-NEXT: br label [[VECTOR_BODY:%.*]] ; LMUL1: vector.body: @@ -25,15 +25,15 @@ define ptr @array_add(ptr noalias nocapture readonly %a, ptr noalias nocapture r ; LMUL1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; LMUL1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP3]] ; LMUL1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 -; LMUL1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 +; LMUL1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 ; LMUL1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP3]] ; LMUL1-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 -; LMUL1-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4 -; LMUL1-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] +; LMUL1-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4 +; LMUL1-NEXT: [[TMP8:%.*]] = add nsw <8 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] ; LMUL1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP3]] ; LMUL1-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 -; LMUL1-NEXT: store <4 x i32> [[TMP8]], ptr [[TMP10]], align 4 -; LMUL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; LMUL1-NEXT: store <8 x i32> [[TMP8]], ptr [[TMP10]], align 4 +; LMUL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; LMUL1-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; LMUL1-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; LMUL1: middle.block: @@ -54,7 +54,7 @@ define ptr @array_add(ptr noalias nocapture readonly %a, ptr noalias nocapture r ; LMUL1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; LMUL1-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; LMUL1-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[SIZE]] -; LMUL1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; LMUL1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; LMUL1: for.end.loopexit: ; LMUL1-NEXT: br label [[FOR_END]] ; LMUL1: for.end: @@ -108,7 +108,7 @@ define ptr @array_add(ptr noalias nocapture readonly %a, ptr noalias nocapture r ; LMUL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; LMUL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; LMUL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[SIZE]] -; LMUL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; LMUL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; LMUL2: for.end.loopexit: ; LMUL2-NEXT: br label [[FOR_END]] ; LMUL2: for.end: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll index bc4e60425ac4c..a266ae643c1af 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll @@ -11,27 +11,30 @@ define void @test(ptr %p) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 200, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 200, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 32 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 200 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i32 0 -; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP7]], align 32 -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 32 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP4]], 200 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0 +; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP9]], align 32 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 200, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -47,7 +50,7 @@ define void @test(ptr %p) { ; CHECK-NEXT: store i64 [[V]], ptr [[A2]], align 32 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -82,12 +85,12 @@ define void @test_may_clobber(ptr %p) { ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 32 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP0]], 100 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[TMP4]], i32 0 -; CHECK-NEXT: store <2 x i64> [[WIDE_LOAD]], ptr [[TMP5]], align 32 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: store <4 x i64> [[WIDE_LOAD]], ptr [[TMP5]], align 32 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -132,27 +135,30 @@ define void @trivial_due_max_vscale(ptr %p) { ; CHECK-LABEL: @trivial_due_max_vscale( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 200, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 200, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 32 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 8192 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i32 0 -; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP7]], align 32 -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 32 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP4]], 8192 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0 +; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP9]], align 32 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 200, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -195,27 +201,30 @@ define void @no_high_lmul_or_interleave(ptr %p) { ; CHECK-LABEL: @no_high_lmul_or_interleave( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 200, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 200, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 32 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1024 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i32 0 -; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP7]], align 32 -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 32 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP4]], 1024 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0 +; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP9]], align 32 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 200, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll index 53e00ad0fee2b..f7bc4bd35f377 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll @@ -16,27 +16,30 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { ; VLENUNK-LABEL: @vector_add( ; VLENUNK-NEXT: entry: ; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLENUNK: vector.ph: -; VLENUNK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLENUNK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; VLENUNK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; VLENUNK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; VLENUNK-NEXT: [[TMP5:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; VLENUNK-NEXT: store [[TMP5]], ptr [[TMP4]], align 8 -; VLENUNK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; VLENUNK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VLENUNK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; VLENUNK-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; VLENUNK-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 +; VLENUNK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; VLENUNK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLENUNK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VLENUNK: middle.block: ; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -58,27 +61,30 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { ; VLEN128-LABEL: @vector_add( ; VLEN128-NEXT: entry: ; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLEN128: vector.ph: -; VLEN128-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] ; VLEN128: vector.body: ; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLEN128-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; VLEN128-NEXT: [[TMP5:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; VLEN128-NEXT: store [[TMP5]], ptr [[TMP4]], align 8 -; VLEN128-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; VLEN128-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VLEN128-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; VLEN128-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; VLEN128-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 +; VLEN128-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; VLEN128-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLEN128-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VLEN128: middle.block: ; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -121,27 +127,27 @@ define void @vector_add_i32(ptr noalias nocapture %a, i32 %v, i64 %n) { ; VLENUNK-LABEL: @vector_add_i32( ; VLENUNK-NEXT: entry: ; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 ; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLENUNK: vector.ph: ; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 -; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 +; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 ; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP4]] ; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 -; VLENUNK-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; VLENUNK-NEXT: store [[TMP7]], ptr [[TMP6]], align 4 +; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 +; VLENUNK-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; VLENUNK-NEXT: store [[TMP7]], ptr [[TMP6]], align 4 ; VLENUNK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 ; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; VLENUNK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VLENUNK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -166,27 +172,27 @@ define void @vector_add_i32(ptr noalias nocapture %a, i32 %v, i64 %n) { ; VLEN128-LABEL: @vector_add_i32( ; VLEN128-NEXT: entry: ; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 ; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLEN128: vector.ph: ; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 -; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 +; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] ; VLEN128: vector.body: ; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 ; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP4]] ; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 -; VLEN128-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; VLEN128-NEXT: store [[TMP7]], ptr [[TMP6]], align 4 +; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 +; VLEN128-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; VLEN128-NEXT: store [[TMP7]], ptr [[TMP6]], align 4 ; VLEN128-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; VLEN128-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 ; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; VLEN128-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VLEN128-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -287,27 +293,30 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; VLENUNK-LABEL: @indexed_store( ; VLENUNK-NEXT: entry: ; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLENUNK: vector.ph: -; VLENUNK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLENUNK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; VLENUNK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP2]] -; VLENUNK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] -; VLENUNK-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0( [[BROADCAST_SPLAT]], [[TMP5]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; VLENUNK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; VLENUNK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VLENUNK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP4]] +; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; VLENUNK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] +; VLENUNK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[TMP7]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; VLENUNK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; VLENUNK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLENUNK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VLENUNK: middle.block: ; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -329,27 +338,30 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; VLEN128-LABEL: @indexed_store( ; VLEN128-NEXT: entry: ; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLEN128: vector.ph: -; VLEN128-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] ; VLEN128: vector.body: ; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLEN128-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP2]] -; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] -; VLEN128-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0( [[BROADCAST_SPLAT]], [[TMP5]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; VLEN128-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; VLEN128-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VLEN128-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP4]] +; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; VLEN128-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] +; VLEN128-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[TMP7]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; VLEN128-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; VLEN128-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLEN128-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VLEN128: middle.block: ; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -389,34 +401,37 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; VLENUNK-LABEL: @indexed_load( ; VLENUNK-NEXT: entry: ; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLENUNK: vector.ph: -; VLENUNK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLENUNK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] -; VLENUNK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; VLENUNK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP2]] -; VLENUNK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] -; VLENUNK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv1i64.nxv1p0( [[TMP5]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; VLENUNK-NEXT: [[TMP6]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] -; VLENUNK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; VLENUNK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VLENUNK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; VLENUNK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP4]] +; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; VLENUNK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] +; VLENUNK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP7]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; VLENUNK-NEXT: [[TMP8]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] +; VLENUNK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 +; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] +; VLENUNK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLENUNK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VLENUNK: middle.block: -; VLENUNK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.add.nxv1i64( [[TMP6]]) +; VLENUNK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP8]]) ; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; VLENUNK: scalar.ph: ; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VLENUNK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] +; VLENUNK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] ; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] ; VLENUNK: for.body: ; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -430,40 +445,43 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 ; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; VLENUNK: for.end: -; VLENUNK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] +; VLENUNK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] ; VLENUNK-NEXT: ret i64 [[SUM_NEXT_LCSSA]] ; ; VLEN128-LABEL: @indexed_load( ; VLEN128-NEXT: entry: ; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLEN128: vector.ph: -; VLEN128-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] ; VLEN128: vector.body: ; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLEN128-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] -; VLEN128-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP2]] -; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] -; VLEN128-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv1i64.nxv1p0( [[TMP5]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; VLEN128-NEXT: [[TMP6]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] -; VLEN128-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; VLEN128-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VLEN128-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; VLEN128-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP4]] +; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; VLEN128-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] +; VLEN128-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP7]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; VLEN128-NEXT: [[TMP8]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] +; VLEN128-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 +; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] +; VLEN128-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLEN128-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VLEN128: middle.block: -; VLEN128-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.add.nxv1i64( [[TMP6]]) +; VLEN128-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP8]]) ; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; VLEN128: scalar.ph: ; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VLEN128-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] +; VLEN128-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] ; VLEN128-NEXT: br label [[FOR_BODY:%.*]] ; VLEN128: for.body: ; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -477,7 +495,7 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 ; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; VLEN128: for.end: -; VLEN128-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] +; VLEN128-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] ; VLEN128-NEXT: ret i64 [[SUM_NEXT_LCSSA]] ; entry: @@ -503,25 +521,28 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; VLENUNK-LABEL: @splat_int( ; VLENUNK-NEXT: entry: ; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLENUNK: vector.ph: -; VLENUNK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLENUNK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; VLENUNK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; VLENUNK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; VLENUNK-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 -; VLENUNK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; VLENUNK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VLENUNK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; VLENUNK-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 +; VLENUNK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; VLENUNK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLENUNK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; VLENUNK: middle.block: ; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -541,25 +562,28 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; VLEN128-LABEL: @splat_int( ; VLEN128-NEXT: entry: ; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLEN128: vector.ph: -; VLEN128-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] ; VLEN128: vector.body: ; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLEN128-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; VLEN128-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 -; VLEN128-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; VLEN128-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VLEN128-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; VLEN128-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 +; VLEN128-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; VLEN128-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLEN128-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; VLEN128: middle.block: ; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -595,25 +619,28 @@ define void @splat_ptr(ptr noalias nocapture %a, ptr %v, i64 %n) { ; VLENUNK-LABEL: @splat_ptr( ; VLENUNK-NEXT: entry: ; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLENUNK: vector.ph: -; VLENUNK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[V:%.*]], i64 0 -; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[V:%.*]], i64 0 +; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLENUNK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; VLENUNK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; VLENUNK-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i32 0 -; VLENUNK-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 -; VLENUNK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; VLENUNK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VLENUNK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0 +; VLENUNK-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 +; VLENUNK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; VLENUNK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLENUNK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; VLENUNK: middle.block: ; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -633,25 +660,28 @@ define void @splat_ptr(ptr noalias nocapture %a, ptr %v, i64 %n) { ; VLEN128-LABEL: @splat_ptr( ; VLEN128-NEXT: entry: ; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLEN128: vector.ph: -; VLEN128-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[V:%.*]], i64 0 -; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[V:%.*]], i64 0 +; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] ; VLEN128: vector.body: ; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLEN128-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i32 0 -; VLEN128-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 -; VLEN128-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; VLEN128-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VLEN128-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0 +; VLEN128-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 +; VLEN128-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; VLEN128-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLEN128-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; VLEN128: middle.block: ; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll index 8ed7b6444ec6e..c553977a83626 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll @@ -227,17 +227,17 @@ for.end: } ; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop. -; CHECK-REMARK: vectorized loop (vectorization width: 8, interleaved count: 2) +; CHECK-REMARK: vectorized loop (vectorization width: 16, interleaved count: 2) define bfloat @fadd_fast_bfloat(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-LABEL: @fadd_fast_bfloat ; CHECK: vector.body: -; CHECK: %[[LOAD1:.*]] = load <8 x bfloat> -; CHECK: %[[LOAD2:.*]] = load <8 x bfloat> -; CHECK: %[[FADD1:.*]] = fadd fast <8 x bfloat> %[[LOAD1]] -; CHECK: %[[FADD2:.*]] = fadd fast <8 x bfloat> %[[LOAD2]] +; CHECK: %[[LOAD1:.*]] = load <16 x bfloat> +; CHECK: %[[LOAD2:.*]] = load <16 x bfloat> +; CHECK: %[[FADD1:.*]] = fadd fast <16 x bfloat> %[[LOAD1]] +; CHECK: %[[FADD2:.*]] = fadd fast <16 x bfloat> %[[LOAD2]] ; CHECK: middle.block: -; CHECK: %[[RDX:.*]] = fadd fast <8 x bfloat> %[[FADD2]], %[[FADD1]] -; CHECK: call fast bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR8000, <8 x bfloat> %[[RDX]]) +; CHECK: %[[RDX:.*]] = fadd fast <16 x bfloat> %[[FADD2]], %[[FADD1]] +; CHECK: call fast bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR8000, <16 x bfloat> %[[RDX]]) entry: br label %for.body @@ -328,17 +328,17 @@ for.end: ; MUL ; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop. -; CHECK-REMARK: vectorized loop (vectorization width: 4, interleaved count: 2) +; CHECK-REMARK: vectorized loop (vectorization width: 8, interleaved count: 2) define i32 @mul(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-LABEL: @mul ; CHECK: vector.body: -; CHECK: %[[LOAD1:.*]] = load <4 x i32> -; CHECK: %[[LOAD2:.*]] = load <4 x i32> -; CHECK: %[[MUL1:.*]] = mul <4 x i32> %[[LOAD1]] -; CHECK: %[[MUL2:.*]] = mul <4 x i32> %[[LOAD2]] +; CHECK: %[[LOAD1:.*]] = load <8 x i32> +; CHECK: %[[LOAD2:.*]] = load <8 x i32> +; CHECK: %[[MUL1:.*]] = mul <8 x i32> %[[LOAD1]] +; CHECK: %[[MUL2:.*]] = mul <8 x i32> %[[LOAD2]] ; CHECK: middle.block: -; CHECK: %[[RDX:.*]] = mul <4 x i32> %[[MUL2]], %[[MUL1]] -; CHECK: call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %[[RDX]]) +; CHECK: %[[RDX:.*]] = mul <8 x i32> %[[MUL2]], %[[MUL1]] +; CHECK: call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %[[RDX]]) entry: br label %for.body @@ -358,21 +358,21 @@ for.end: ; preds = %for.body, %entry ; Note: This test was added to ensure we always check the legality of reductions (and emit a warning if necessary) before checking for memory dependencies ; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop. -; CHECK-REMARK: vectorized loop (vectorization width: 4, interleaved count: 2) +; CHECK-REMARK: vectorized loop (vectorization width: 8, interleaved count: 2) define i32 @memory_dependence(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) { ; CHECK-LABEL: @memory_dependence ; CHECK: vector.body: -; CHECK: %[[LOAD1:.*]] = load <4 x i32> -; CHECK: %[[LOAD2:.*]] = load <4 x i32> -; CHECK: %[[LOAD3:.*]] = load <4 x i32> -; CHECK: %[[LOAD4:.*]] = load <4 x i32> -; CHECK: %[[ADD1:.*]] = add nsw <4 x i32> %[[LOAD3]], %[[LOAD1]] -; CHECK: %[[ADD2:.*]] = add nsw <4 x i32> %[[LOAD4]], %[[LOAD2]] -; CHECK: %[[MUL1:.*]] = mul <4 x i32> %[[LOAD3]] -; CHECK: %[[MUL2:.*]] = mul <4 x i32> %[[LOAD4]] +; CHECK: %[[LOAD1:.*]] = load <8 x i32> +; CHECK: %[[LOAD2:.*]] = load <8 x i32> +; CHECK: %[[LOAD3:.*]] = load <8 x i32> +; CHECK: %[[LOAD4:.*]] = load <8 x i32> +; CHECK: %[[ADD1:.*]] = add nsw <8 x i32> %[[LOAD3]], %[[LOAD1]] +; CHECK: %[[ADD2:.*]] = add nsw <8 x i32> %[[LOAD4]], %[[LOAD2]] +; CHECK: %[[MUL1:.*]] = mul <8 x i32> %[[LOAD3]] +; CHECK: %[[MUL2:.*]] = mul <8 x i32> %[[LOAD4]] ; CHECK: middle.block: -; CHECK: %[[RDX:.*]] = mul <4 x i32> %[[MUL2]], %[[MUL1]] -; CHECK: call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %[[RDX]]) +; CHECK: %[[RDX:.*]] = mul <8 x i32> %[[MUL2]], %[[MUL1]] +; CHECK: call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %[[RDX]]) entry: br label %for.body @@ -396,19 +396,19 @@ for.end: ret i32 %mul } -; CHECK-REMARK: vectorized loop (vectorization width: vscale x 2, interleaved count: 2) +; CHECK-REMARK: vectorized loop (vectorization width: vscale x 4, interleaved count: 2) define float @fmuladd(ptr %a, ptr %b, i64 %n) { ; CHECK-LABEL: @fmuladd( ; CHECK: vector.body: -; CHECK: [[WIDE_LOAD:%.*]] = load -; CHECK: [[WIDE_LOAD2:%.*]] = load -; CHECK: [[WIDE_LOAD3:%.*]] = load -; CHECK: [[WIDE_LOAD4:%.*]] = load -; CHECK: [[MULADD1:%.*]] = call reassoc @llvm.fmuladd.nxv2f32( [[WIDE_LOAD]], [[WIDE_LOAD3]], -; CHECK: [[MULADD2:%.*]] = call reassoc @llvm.fmuladd.nxv2f32( [[WIDE_LOAD2]], [[WIDE_LOAD4]], +; CHECK: [[WIDE_LOAD:%.*]] = load +; CHECK: [[WIDE_LOAD2:%.*]] = load +; CHECK: [[WIDE_LOAD3:%.*]] = load +; CHECK: [[WIDE_LOAD4:%.*]] = load +; CHECK: [[MULADD1:%.*]] = call reassoc @llvm.fmuladd.nxv4f32( [[WIDE_LOAD]], [[WIDE_LOAD3]], +; CHECK: [[MULADD2:%.*]] = call reassoc @llvm.fmuladd.nxv4f32( [[WIDE_LOAD2]], [[WIDE_LOAD4]], ; CHECK: middle.block: -; CHECK: [[BIN_RDX:%.*]] = fadd reassoc [[MULADD2]], [[MULADD1]] -; CHECK: call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float -0.000000e+00, [[BIN_RDX]]) +; CHECK: [[BIN_RDX:%.*]] = fadd reassoc [[MULADD2]], [[MULADD1]] +; CHECK: call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[BIN_RDX]]) ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll index 6d057f378d199..5e231da7e7b57 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll @@ -12,27 +12,30 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024) -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv1i64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP6:%.*]] = add [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: call void @llvm.masked.store.nxv1i64.p0( [[TMP6]], ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP8:%.*]] = add [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP8]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -75,27 +78,30 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024) -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv1i64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0( [[BROADCAST_SPLAT]], [[TMP6]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[TMP8]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -136,34 +142,37 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024) -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv1i64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv1i64.nxv1p0( [[TMP6]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP7]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] -; CHECK-NEXT: [[TMP8:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP7]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP8]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP9]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP9]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.nxv1i64( [[TMP8]]) +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP10]]) ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -177,7 +186,7 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[SUM_NEXT_LCSSA]] ; entry: @@ -205,25 +214,28 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024) -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 -; CHECK-NEXT: call void @llvm.masked.store.nxv1i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -260,26 +272,29 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024) +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) ; CHECK-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 -; CHECK-NEXT: call void @llvm.masked.store.nxv1i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll index 0d6ef7c00def8..4c994772643ef 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll @@ -57,33 +57,22 @@ define void @small_trip_count_min_vlen_32(ptr nocapture %a) nounwind vscale_rang ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 4, [[TMP4]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i32(i32 [[TMP5]], i32 4) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i32.p0(ptr [[TMP7]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP8:%.*]] = add nsw [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.store.nxv2i32.p0( [[TMP8]], ptr [[TMP7]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP10]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[TMP2]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 4, 4 +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 4, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 7b6e29388c759..0bdcf5b1efd01 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -13,26 +13,29 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; SCALABLE-LABEL: @uniform_load( ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP3:%.*]] = load i64, ptr [[B:%.*]], align 8 -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 -; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; SCALABLE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 -; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 -; SCALABLE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; SCALABLE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP5:%.*]] = load i64, ptr [[B:%.*]], align 8 +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP7]], align 8 +; SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -58,17 +61,17 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; FIXEDLEN: vector.body: ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; FIXEDLEN-NEXT: [[TMP2:%.*]] = load i64, ptr [[B:%.*]], align 8 -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[TMP2]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8 -; FIXEDLEN-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 2 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP6]], align 8 -; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8 +; FIXEDLEN-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 4 +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP6]], align 8 +; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXEDLEN-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; FIXEDLEN: middle.block: @@ -93,26 +96,29 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TF-SCALABLE: vector.ph: ; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]] +; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-SCALABLE-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024) -; TF-SCALABLE-NEXT: [[TMP4:%.*]] = load i64, ptr [[B:%.*]], align 8 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP4]], i64 0 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; TF-SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP3]] -; TF-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP6]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TF-SCALABLE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) +; TF-SCALABLE-NEXT: [[TMP6:%.*]] = load i64, ptr [[B:%.*]], align 8 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP8]], i32 8, [[ACTIVE_LANE_MASK]]) +; TF-SCALABLE-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; TF-SCALABLE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TF-SCALABLE: middle.block: ; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-SCALABLE: scalar.ph: @@ -138,12 +144,12 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = load i64, ptr [[B:%.*]], align 8 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP1]], i64 0 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; TF-FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; TF-FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TF-FIXEDLEN: middle.block: @@ -183,26 +189,29 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-LABEL: @uniform_load_outside_use( ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP3:%.*]] = load i64, ptr [[B:%.*]], align 8 -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 -; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; SCALABLE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 -; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 -; SCALABLE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; SCALABLE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP5:%.*]] = load i64, ptr [[B:%.*]], align 8 +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP7]], align 8 +; SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -218,7 +227,7 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 ; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; SCALABLE: for.end: -; SCALABLE-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] ; SCALABLE-NEXT: ret i64 [[V_LCSSA]] ; ; FIXEDLEN-LABEL: @uniform_load_outside_use( @@ -229,17 +238,17 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; FIXEDLEN: vector.body: ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; FIXEDLEN-NEXT: [[TMP2:%.*]] = load i64, ptr [[B:%.*]], align 8 -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[TMP2]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8 -; FIXEDLEN-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 2 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP6]], align 8 -; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8 +; FIXEDLEN-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 4 +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP6]], align 8 +; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXEDLEN-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; FIXEDLEN: middle.block: @@ -284,12 +293,12 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = load i64, ptr [[B:%.*]], align 8 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP1]], i64 0 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; TF-FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; TF-FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TF-FIXEDLEN: middle.block: @@ -331,39 +340,43 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE-LABEL: @conditional_uniform_load( ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; SCALABLE-NEXT: [[TMP2:%.*]] = call @llvm.experimental.stepvector.nxv1i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = add [[TMP2]], zeroinitializer -; SCALABLE-NEXT: [[TMP4:%.*]] = mul [[TMP3]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP4]] -; SCALABLE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP6:%.*]] = mul i64 1, [[TMP5]] -; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 -; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 -; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; SCALABLE-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer +; SCALABLE-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] +; SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]] +; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP8:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv1i64.nxv1p0( [[BROADCAST_SPLAT]], i32 8, [[TMP8]], poison) -; SCALABLE-NEXT: [[TMP9:%.*]] = xor [[TMP8]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[PREDPHI:%.*]] = select [[TMP8]], [[WIDE_MASKED_GATHER]], zeroinitializer -; SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] -; SCALABLE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0 -; SCALABLE-NEXT: store [[PREDPHI]], ptr [[TMP11]], align 8 -; SCALABLE-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] -; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; SCALABLE-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP11:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], i32 8, [[TMP11]], poison) +; SCALABLE-NEXT: [[TMP12:%.*]] = xor [[TMP11]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[PREDPHI:%.*]] = select [[TMP11]], [[WIDE_MASKED_GATHER]], zeroinitializer +; SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP10]] +; SCALABLE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0 +; SCALABLE-NEXT: store [[PREDPHI]], ptr [[TMP14]], align 8 +; SCALABLE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] +; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -391,33 +404,33 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; FIXEDLEN-NEXT: entry: ; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; FIXEDLEN: vector.ph: -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x ptr> poison, ptr [[B]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT2]], <2 x ptr> poison, <2 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[B:%.*]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x ptr> poison, ptr [[B]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT2]], <4 x ptr> poison, <4 x i32> zeroinitializer ; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXEDLEN: vector.body: ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], +; FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], ; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 -; FIXEDLEN-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], -; FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp ugt <2 x i64> [[STEP_ADD]], -; FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> [[BROADCAST_SPLAT]], i32 8, <2 x i1> [[TMP2]], <2 x i64> poison) -; FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> [[BROADCAST_SPLAT3]], i32 8, <2 x i1> [[TMP3]], <2 x i64> poison) -; FIXEDLEN-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP2]], -; FIXEDLEN-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP3]], -; FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[WIDE_MASKED_GATHER]], <2 x i64> zeroinitializer -; FIXEDLEN-NEXT: [[PREDPHI5:%.*]] = select <2 x i1> [[TMP3]], <2 x i64> [[WIDE_MASKED_GATHER4]], <2 x i64> zeroinitializer +; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 +; FIXEDLEN-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], +; FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp ugt <4 x i64> [[STEP_ADD]], +; FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP2]], <4 x i64> poison) +; FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT3]], i32 8, <4 x i1> [[TMP3]], <4 x i64> poison) +; FIXEDLEN-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP2]], +; FIXEDLEN-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], +; FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> [[WIDE_MASKED_GATHER]], <4 x i64> zeroinitializer +; FIXEDLEN-NEXT: [[PREDPHI5:%.*]] = select <4 x i1> [[TMP3]], <4 x i64> [[WIDE_MASKED_GATHER4]], <4 x i64> zeroinitializer ; FIXEDLEN-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXEDLEN-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 -; FIXEDLEN-NEXT: store <2 x i64> [[PREDPHI]], ptr [[TMP8]], align 8 -; FIXEDLEN-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 2 -; FIXEDLEN-NEXT: store <2 x i64> [[PREDPHI5]], ptr [[TMP9]], align 8 -; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], +; FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP8]], align 8 +; FIXEDLEN-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 4 +; FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI5]], ptr [[TMP9]], align 8 +; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], ; FIXEDLEN-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; FIXEDLEN: middle.block: @@ -448,42 +461,46 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TF-SCALABLE: vector.ph: ; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]] +; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call @llvm.experimental.stepvector.nxv1i64() -; TF-SCALABLE-NEXT: [[TMP4:%.*]] = add [[TMP3]], zeroinitializer -; TF-SCALABLE-NEXT: [[TMP5:%.*]] = mul [[TMP4]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP5]] -; TF-SCALABLE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul i64 1, [[TMP6]] -; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 -; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; TF-SCALABLE-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer +; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; TF-SCALABLE-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP9]] +; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 +; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 -; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP8]], i64 1024) -; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i64 0), poison, zeroinitializer) -; TF-SCALABLE-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP9]], zeroinitializer -; TF-SCALABLE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv1i64.nxv1p0( [[BROADCAST_SPLAT]], i32 8, [[TMP10]], poison) -; TF-SCALABLE-NEXT: [[TMP11:%.*]] = xor [[TMP9]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; TF-SCALABLE-NEXT: [[TMP12:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP11]], zeroinitializer -; TF-SCALABLE-NEXT: [[PREDPHI:%.*]] = select [[TMP10]], [[WIDE_MASKED_GATHER]], zeroinitializer -; TF-SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP8]] -; TF-SCALABLE-NEXT: [[TMP14:%.*]] = or [[TMP10]], [[TMP12]] -; TF-SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0 -; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0( [[PREDPHI]], ptr [[TMP15]], i32 8, [[TMP14]]) -; TF-SCALABLE-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP16]] -; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; TF-SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TF-SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; TF-SCALABLE-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0 +; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP11]], i64 1024) +; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i64 0), poison, zeroinitializer) +; TF-SCALABLE-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP12]], zeroinitializer +; TF-SCALABLE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], i32 8, [[TMP13]], poison) +; TF-SCALABLE-NEXT: [[TMP14:%.*]] = xor [[TMP12]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; TF-SCALABLE-NEXT: [[TMP15:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP14]], zeroinitializer +; TF-SCALABLE-NEXT: [[PREDPHI:%.*]] = select [[TMP13]], [[WIDE_MASKED_GATHER]], zeroinitializer +; TF-SCALABLE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP11]] +; TF-SCALABLE-NEXT: [[TMP17:%.*]] = or [[TMP13]], [[TMP15]] +; TF-SCALABLE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP16]], i32 0 +; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP18]], i32 8, [[TMP17]]) +; TF-SCALABLE-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2 +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP20]] +; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; TF-SCALABLE-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TF-SCALABLE: middle.block: ; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-SCALABLE: scalar.ph: @@ -510,22 +527,22 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-FIXEDLEN-NEXT: entry: ; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TF-FIXEDLEN: vector.ph: -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i64 0 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[B:%.*]], i64 0 +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer ; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-FIXEDLEN: vector.body: ; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; TF-FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], -; TF-FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> [[BROADCAST_SPLAT]], i32 8, <2 x i1> [[TMP1]], <2 x i64> poison) -; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = xor <2 x i1> [[TMP1]], -; TF-FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> [[WIDE_MASKED_GATHER]], <2 x i64> zeroinitializer +; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], +; TF-FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP1]], <4 x i64> poison) +; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], +; TF-FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[WIDE_MASKED_GATHER]], <4 x i64> zeroinitializer ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; TF-FIXEDLEN-NEXT: store <2 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; TF-FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; TF-FIXEDLEN-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TF-FIXEDLEN: middle.block: @@ -578,26 +595,29 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; SCALABLE-LABEL: @uniform_load_unaligned( ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP3:%.*]] = load i64, ptr [[B:%.*]], align 1 -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 -; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; SCALABLE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 -; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 -; SCALABLE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; SCALABLE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP5:%.*]] = load i64, ptr [[B:%.*]], align 1 +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP7]], align 8 +; SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -623,17 +643,17 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; FIXEDLEN: vector.body: ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; FIXEDLEN-NEXT: [[TMP2:%.*]] = load i64, ptr [[B:%.*]], align 1 -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[TMP2]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8 -; FIXEDLEN-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 2 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP6]], align 8 -; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8 +; FIXEDLEN-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 4 +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP6]], align 8 +; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXEDLEN-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; FIXEDLEN: middle.block: @@ -658,26 +678,29 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TF-SCALABLE: vector.ph: ; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]] +; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-SCALABLE-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024) -; TF-SCALABLE-NEXT: [[TMP4:%.*]] = load i64, ptr [[B:%.*]], align 1 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP4]], i64 0 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; TF-SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP3]] -; TF-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP6]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TF-SCALABLE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) +; TF-SCALABLE-NEXT: [[TMP6:%.*]] = load i64, ptr [[B:%.*]], align 1 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP8]], i32 8, [[ACTIVE_LANE_MASK]]) +; TF-SCALABLE-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; TF-SCALABLE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TF-SCALABLE: middle.block: ; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-SCALABLE: scalar.ph: @@ -703,12 +726,12 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = load i64, ptr [[B:%.*]], align 1 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP1]], i64 0 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; TF-FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; TF-FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; TF-FIXEDLEN: middle.block: @@ -748,26 +771,29 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; SCALABLE-LABEL: @uniform_store( ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 ; SCALABLE-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 -; SCALABLE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; SCALABLE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 -; SCALABLE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; SCALABLE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 +; SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -789,23 +815,23 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; FIXEDLEN-NEXT: entry: ; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; FIXEDLEN: vector.ph: -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXEDLEN: vector.body: ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 ; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 -; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8 -; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 +; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8 +; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXEDLEN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; FIXEDLEN: middle.block: @@ -830,26 +856,29 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TF-SCALABLE: vector.ph: ; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]] +; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-SCALABLE-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024) +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) ; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 -; TF-SCALABLE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP3]] -; TF-SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 -; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] -; TF-SCALABLE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TF-SCALABLE-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; TF-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] +; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; TF-SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; TF-SCALABLE: middle.block: ; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-SCALABLE: scalar.ph: @@ -870,8 +899,8 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; TF-FIXEDLEN-NEXT: entry: ; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TF-FIXEDLEN: vector.ph: -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-FIXEDLEN: vector.body: ; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -879,8 +908,8 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 ; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -; TF-FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; TF-FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; TF-FIXEDLEN: middle.block: @@ -920,35 +949,40 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; SCALABLE-LABEL: @uniform_store_of_loop_varying( ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP2:%.*]] = call @llvm.experimental.stepvector.nxv1i64() -; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[INDEX]], i64 0 -; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; SCALABLE-NEXT: [[TMP3:%.*]] = add zeroinitializer, [[TMP2]] -; SCALABLE-NEXT: [[TMP4:%.*]] = mul [[TMP3]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[TMP5:%.*]] = add [[DOTSPLAT]], [[TMP4]] -; SCALABLE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() -; SCALABLE-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 -; SCALABLE-NEXT: [[TMP9:%.*]] = extractelement [[TMP5]], i32 [[TMP8]] -; SCALABLE-NEXT: store i64 [[TMP9]], ptr [[B:%.*]], align 8 -; SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] -; SCALABLE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0 -; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP11]], align 8 -; SCALABLE-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] -; SCALABLE-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; SCALABLE-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[INDEX]], i64 0 +; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[TMP5:%.*]] = add zeroinitializer, [[TMP4]] +; SCALABLE-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[TMP7:%.*]] = add [[DOTSPLAT]], [[TMP6]] +; SCALABLE-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 +; SCALABLE-NEXT: [[TMP10:%.*]] = call i32 @llvm.vscale.i32() +; SCALABLE-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], 2 +; SCALABLE-NEXT: [[TMP12:%.*]] = sub i32 [[TMP11]], 1 +; SCALABLE-NEXT: [[TMP13:%.*]] = extractelement [[TMP7]], i32 [[TMP12]] +; SCALABLE-NEXT: store i64 [[TMP13]], ptr [[B:%.*]], align 8 +; SCALABLE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP8]] +; SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i32 0 +; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP15]], align 8 +; SCALABLE-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 2 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]] +; SCALABLE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -970,33 +1004,31 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; FIXEDLEN-NEXT: entry: ; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; FIXEDLEN: vector.ph: -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x ptr> poison, ptr [[B]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT2]], <2 x ptr> poison, <2 x i32> zeroinitializer -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT4]], <2 x i64> poison, <2 x i32> zeroinitializer -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT6]], <2 x i64> poison, <2 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXEDLEN: vector.body: ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], ; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 -; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[VEC_IND]], <2 x ptr> [[BROADCAST_SPLAT]], i32 8, <2 x i1> ) -; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[STEP_ADD]], <2 x ptr> [[BROADCAST_SPLAT3]], i32 8, <2 x i1> ) -; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] -; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] -; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT5]], ptr [[TMP4]], align 8 -; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT7]], ptr [[TMP5]], align 8 -; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], -; FIXEDLEN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; FIXEDLEN-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; FIXEDLEN-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; FIXEDLEN-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; FIXEDLEN-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 +; FIXEDLEN-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 +; FIXEDLEN-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 +; FIXEDLEN-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 +; FIXEDLEN-NEXT: store i64 [[TMP7]], ptr [[B:%.*]], align 8 +; FIXEDLEN-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] +; FIXEDLEN-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] +; FIXEDLEN-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP10]], align 8 +; FIXEDLEN-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 4 +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP11]], align 8 +; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; FIXEDLEN-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; FIXEDLEN-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; FIXEDLEN: middle.block: ; FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 ; FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1019,38 +1051,42 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TF-SCALABLE: vector.ph: ; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]] +; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call @llvm.experimental.stepvector.nxv1i64() -; TF-SCALABLE-NEXT: [[TMP4:%.*]] = add [[TMP3]], zeroinitializer -; TF-SCALABLE-NEXT: [[TMP5:%.*]] = mul [[TMP4]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP5]] -; TF-SCALABLE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul i64 1, [[TMP6]] -; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 -; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; TF-SCALABLE-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer +; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; TF-SCALABLE-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP9]] +; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 +; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 -; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP8]], i64 1024) -; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0( [[VEC_IND]], [[BROADCAST_SPLAT]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP8]] -; TF-SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i32 0 -; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0( [[BROADCAST_SPLAT2]], ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP11]] -; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TF-SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; TF-SCALABLE-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0 +; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP11]], i64 1024) +; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[VEC_IND]], [[BROADCAST_SPLAT]], i32 8, [[ACTIVE_LANE_MASK]]) +; TF-SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP11]] +; TF-SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 +; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT2]], ptr [[TMP13]], i32 8, [[ACTIVE_LANE_MASK]]) +; TF-SCALABLE-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 2 +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP15]] +; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; TF-SCALABLE-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; TF-SCALABLE: middle.block: ; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-SCALABLE: scalar.ph: @@ -1071,23 +1107,22 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-FIXEDLEN-NEXT: entry: ; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TF-FIXEDLEN: vector.ph: -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i64 0 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-FIXEDLEN: vector.body: ; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; TF-FIXEDLEN-NEXT: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[VEC_IND]], <2 x ptr> [[BROADCAST_SPLAT]], i32 8, <2 x i1> ) -; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] -; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -; TF-FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP2]], align 8 -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; TF-FIXEDLEN-NEXT: store i64 [[TMP3]], ptr [[B:%.*]], align 8 +; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] +; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 +; TF-FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; TF-FIXEDLEN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; TF-FIXEDLEN-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; TF-FIXEDLEN: middle.block: ; TF-FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 ; TF-FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1125,39 +1160,43 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE-LABEL: @conditional_uniform_store( ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; SCALABLE-NEXT: [[TMP2:%.*]] = call @llvm.experimental.stepvector.nxv1i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = add [[TMP2]], zeroinitializer -; SCALABLE-NEXT: [[TMP4:%.*]] = mul [[TMP3]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP4]] -; SCALABLE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP6:%.*]] = mul i64 1, [[TMP5]] -; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 -; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 -; SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; SCALABLE-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; SCALABLE-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer +; SCALABLE-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] +; SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]] +; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP8:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0( [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]], i32 8, [[TMP8]]) -; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] -; SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i32 0 -; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP10]], align 8 -; SCALABLE-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP11:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]], i32 8, [[TMP11]]) +; SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP10]] +; SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 +; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP13]], align 8 +; SCALABLE-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 2 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] +; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; SCALABLE-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1184,33 +1223,33 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; FIXEDLEN-NEXT: entry: ; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; FIXEDLEN: vector.ph: -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT2]], <2 x ptr> poison, <2 x i32> zeroinitializer -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT4]], <2 x i64> poison, <2 x i32> zeroinitializer -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <2 x ptr> poison, ptr [[B]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT6]], <2 x ptr> poison, <2 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x ptr> poison, ptr [[B:%.*]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT2]], <4 x ptr> poison, <4 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT4]], <4 x i64> poison, <4 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x ptr> poison, ptr [[B]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT6]], <4 x ptr> poison, <4 x i32> zeroinitializer ; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXEDLEN: vector.body: ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], +; FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], ; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 -; FIXEDLEN-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], -; FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp ugt <2 x i64> [[STEP_ADD]], -; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[BROADCAST_SPLAT]], <2 x ptr> [[BROADCAST_SPLAT3]], i32 8, <2 x i1> [[TMP2]]) -; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[BROADCAST_SPLAT5]], <2 x ptr> [[BROADCAST_SPLAT7]], i32 8, <2 x i1> [[TMP3]]) +; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 +; FIXEDLEN-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], +; FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp ugt <4 x i64> [[STEP_ADD]], +; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT3]], i32 8, <4 x i1> [[TMP2]]) +; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT5]], <4 x ptr> [[BROADCAST_SPLAT7]], i32 8, <4 x i1> [[TMP3]]) ; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXEDLEN-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 -; FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 2 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT5]], ptr [[TMP7]], align 8 -; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 +; FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 4 +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT5]], ptr [[TMP7]], align 8 +; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], ; FIXEDLEN-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; FIXEDLEN: middle.block: @@ -1240,43 +1279,47 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TF-SCALABLE: vector.ph: ; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]] +; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call @llvm.experimental.stepvector.nxv1i64() -; TF-SCALABLE-NEXT: [[TMP4:%.*]] = add [[TMP3]], zeroinitializer -; TF-SCALABLE-NEXT: [[TMP5:%.*]] = mul [[TMP4]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP5]] -; TF-SCALABLE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul i64 1, [[TMP6]] -; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 -; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; TF-SCALABLE-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer +; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; TF-SCALABLE-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP9]] +; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 +; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 -; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP8]], i64 1024) -; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i64 0), poison, zeroinitializer) -; TF-SCALABLE-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP9]], zeroinitializer -; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0( [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]], i32 8, [[TMP10]]) -; TF-SCALABLE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP8]] -; TF-SCALABLE-NEXT: [[TMP12:%.*]] = xor [[TMP9]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; TF-SCALABLE-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP12]], zeroinitializer -; TF-SCALABLE-NEXT: [[TMP14:%.*]] = or [[TMP10]], [[TMP13]] -; TF-SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0 -; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP15]], i32 8, [[TMP14]]) -; TF-SCALABLE-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP16]] -; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; TF-SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TF-SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; TF-SCALABLE-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0 +; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP11]], i64 1024) +; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i64 0), poison, zeroinitializer) +; TF-SCALABLE-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP12]], zeroinitializer +; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]], i32 8, [[TMP13]]) +; TF-SCALABLE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP11]] +; TF-SCALABLE-NEXT: [[TMP15:%.*]] = xor [[TMP12]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; TF-SCALABLE-NEXT: [[TMP16:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP15]], zeroinitializer +; TF-SCALABLE-NEXT: [[TMP17:%.*]] = or [[TMP13]], [[TMP16]] +; TF-SCALABLE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i32 0 +; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP18]], i32 8, [[TMP17]]) +; TF-SCALABLE-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2 +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP20]] +; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; TF-SCALABLE-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; TF-SCALABLE: middle.block: ; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-SCALABLE: scalar.ph: @@ -1302,22 +1345,22 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-FIXEDLEN-NEXT: entry: ; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TF-FIXEDLEN: vector.ph: -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i64 0 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT1]], <2 x ptr> poison, <2 x i32> zeroinitializer +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x ptr> poison, ptr [[B:%.*]], i64 0 +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT1]], <4 x ptr> poison, <4 x i32> zeroinitializer ; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-FIXEDLEN: vector.body: ; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; TF-FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], -; TF-FIXEDLEN-NEXT: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[BROADCAST_SPLAT]], <2 x ptr> [[BROADCAST_SPLAT2]], i32 8, <2 x i1> [[TMP1]]) +; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], +; TF-FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT2]], i32 8, <4 x i1> [[TMP1]]) ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; TF-FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; TF-FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; TF-FIXEDLEN: middle.block: @@ -1368,26 +1411,29 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-LABEL: @uniform_store_unaligned( ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 ; SCALABLE-NEXT: store i64 [[V]], ptr [[B:%.*]], align 1 -; SCALABLE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; SCALABLE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 -; SCALABLE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; SCALABLE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 +; SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1409,23 +1455,23 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; FIXEDLEN-NEXT: entry: ; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; FIXEDLEN: vector.ph: -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXEDLEN: vector.body: ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B:%.*]], align 1 ; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 -; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8 -; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 +; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8 +; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXEDLEN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; FIXEDLEN: middle.block: @@ -1450,26 +1496,29 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TF-SCALABLE: vector.ph: ; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]] +; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-SCALABLE-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024) +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) ; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B:%.*]], align 1 -; TF-SCALABLE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP3]] -; TF-SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 -; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] -; TF-SCALABLE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TF-SCALABLE-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; TF-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] +; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; TF-SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; TF-SCALABLE: middle.block: ; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-SCALABLE: scalar.ph: @@ -1490,8 +1539,8 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; TF-FIXEDLEN-NEXT: entry: ; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TF-FIXEDLEN: vector.ph: -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-FIXEDLEN: vector.body: ; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1499,8 +1548,8 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[B:%.*]], align 1 ; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -; TF-FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; TF-FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; TF-FIXEDLEN: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/zvl32b.ll b/llvm/test/Transforms/LoopVectorize/RISCV/zvl32b.ll index 0e2f916ac08b6..fb2167b7f5c33 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/zvl32b.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/zvl32b.ll @@ -12,33 +12,23 @@ define void @vector_add_i16(ptr noalias nocapture %a, i16 %v, i64 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[V:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <2 x i16> poison, i16 [[V]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT4]], <2 x i16> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[V:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], <2 x i64> [[VEC_IND]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A]], <2 x i64> [[STEP_ADD]] -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x i16>, ptr [[TMP3]], align 2 -; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <4 x i16>, ptr [[TMP5]], align 2 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i16> [[WIDE_VEC]], <4 x i16> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <4 x i16> [[WIDE_VEC2]], <4 x i16> poison, <2 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i16> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i16> [[STRIDED_VEC3]], [[BROADCAST_SPLAT5]] -; CHECK-NEXT: call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> [[TMP6]], <2 x ptr> [[TMP0]], i32 2, <2 x i1> ) -; CHECK-NEXT: call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> [[TMP7]], <2 x ptr> [[TMP1]], i32 2, <2 x i1> ) +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], <4 x i64> [[VEC_IND]] +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i16>, ptr [[TMP2]], align 2 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i16> [[WIDE_VEC]], <8 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i16> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> [[TMP3]], <4 x ptr> [[TMP0]], i32 2, <4 x i1> ) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1020 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1020 +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -56,6 +46,7 @@ define void @vector_add_i16(ptr noalias nocapture %a, i16 %v, i64 %n) { ; CHECK: for.end: ; CHECK-NEXT: ret void ; + entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVersioning/lcssa.ll b/llvm/test/Transforms/LoopVersioning/lcssa.ll index ee14f693abd7a..4b51c21257243 100644 --- a/llvm/test/Transforms/LoopVersioning/lcssa.ll +++ b/llvm/test/Transforms/LoopVersioning/lcssa.ll @@ -56,7 +56,6 @@ define void @fill_no_null_opt(i8** %ls1.20, i8** %ls2.21, i8* %cse3.22) #0 { ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP]], [[SCEVGEP2]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[LS2_21_PROMOTED]], [[SCEVGEP1]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, i8* [[LS1_20_PROMOTED]], i64 -1 ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %bb1.ph.lver.orig, label %bb1.ph ; CHECK: bb1.ph.lver.orig: ; diff --git a/llvm/test/Transforms/MergeFunc/mergefunc-preserve-nonnull.ll b/llvm/test/Transforms/MergeFunc/mergefunc-preserve-nonnull.ll new file mode 100644 index 0000000000000..3481d53b626fc --- /dev/null +++ b/llvm/test/Transforms/MergeFunc/mergefunc-preserve-nonnull.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=mergefunc -S < %s | FileCheck %s + +; This test makes sure that the mergefunc pass does not merge functions +; that have different nonnull assertions. + +%1 = type ptr + +define void @f1(ptr %0, ptr %1) { +; CHECK-LABEL: @f1( +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP1:%.*]], align 8, !nonnull !0 +; CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP0:%.*]], align 8 +; CHECK-NEXT: ret void +; + %3 = load ptr, ptr %1, align 8, !nonnull !0 + store ptr %3, ptr %0, align 8 + ret void +} + +define void @f2(ptr %0, ptr %1) { +; CHECK-LABEL: @f2( +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP1:%.*]], align 8 +; CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP0:%.*]], align 8 +; CHECK-NEXT: ret void +; + %3 = load ptr, ptr %1, align 8 + store ptr %3, ptr %0, align 8 + ret void +} + +define void @noundef(ptr %0, ptr %1) { +; CHECK-LABEL: @noundef( +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP1:%.*]], align 8, !noundef !0 +; CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP0:%.*]], align 8 +; CHECK-NEXT: ret void +; + %3 = load ptr, ptr %1, align 8, !noundef !0 + store ptr %3, ptr %0, align 8 + ret void +} + +define void @noalias_1(ptr %0, ptr %1) { +; CHECK-LABEL: @noalias_1( +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP1:%.*]], align 8, !noalias !1 +; CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP0:%.*]], align 8, !alias.scope !1 +; CHECK-NEXT: ret void +; + %3 = load ptr, ptr %1, align 8, !noalias !4 + store ptr %3, ptr %0, align 8, !alias.scope !4 + ret void +} + +define void @noundef_dbg(ptr %0, ptr %1) { +; CHECK-LABEL: @noundef_dbg( +; CHECK-NEXT: tail call void @noundef(ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) +; CHECK-NEXT: ret void +; + %3 = load ptr, ptr %1, align 8, !noundef !0, !dbg !1 + store ptr %3, ptr %0, align 8 + ret void +} + +; FIXME: This is merged despite different noalias metadata. +define void @noalias_2(ptr %0, ptr %1) { +; CHECK-LABEL: @noalias_2( +; CHECK-NEXT: tail call void @noalias_1(ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) +; CHECK-NEXT: ret void +; + %3 = load ptr, ptr %1, align 8, !noalias !7 + store ptr %3, ptr %0, align 8, !alias.scope !7 + ret void +} + +!0 = !{} +!1 = !{} +!2 = !{!2} +!3 = !{!3, !2} +!4 = !{!3} +!5 = !{!5} +!6 = !{!6, !5} +!7 = !{!6} diff --git a/llvm/test/Transforms/NewGVN/range.ll b/llvm/test/Transforms/NewGVN/range.ll index 8803737c7bd4a..c853693897ab3 100644 --- a/llvm/test/Transforms/NewGVN/range.ll +++ b/llvm/test/Transforms/NewGVN/range.ll @@ -17,7 +17,7 @@ define i32 @test1(ptr %p) { define i32 @test2(ptr %p) { ; CHECK-LABEL: define i32 @test2 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4 ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -30,7 +30,7 @@ define i32 @test2(ptr %p) { define i32 @test3(ptr %p) { ; CHECK-LABEL: define i32 @test3 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG1:![0-9]+]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -43,7 +43,7 @@ define i32 @test3(ptr %p) { define i32 @test4(ptr %p) { ; CHECK-LABEL: define i32 @test4 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG2:![0-9]+]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -56,7 +56,7 @@ define i32 @test4(ptr %p) { define i32 @test5(ptr %p) { ; CHECK-LABEL: define i32 @test5 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG1:![0-9]+]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG3:![0-9]+]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -69,7 +69,7 @@ define i32 @test5(ptr %p) { define i32 @test6(ptr %p) { ; CHECK-LABEL: define i32 @test6 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG2:![0-9]+]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG4:![0-9]+]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -82,7 +82,7 @@ define i32 @test6(ptr %p) { define i32 @test7(ptr %p) { ; CHECK-LABEL: define i32 @test7 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG3:![0-9]+]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG5:![0-9]+]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -95,7 +95,7 @@ define i32 @test7(ptr %p) { define i32 @test8(ptr %p) { ; CHECK-LABEL: define i32 @test8 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG4:![0-9]+]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4 ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -119,8 +119,9 @@ define i32 @test8(ptr %p) { !10 = !{i32 5, i32 1} ;. ; CHECK: [[RNG0]] = !{i32 0, i32 2} -; CHECK: [[RNG1]] = !{i32 -5, i32 -2} -; CHECK: [[RNG2]] = !{i32 10, i32 1} -; CHECK: [[RNG3]] = !{i32 1, i32 2, i32 3, i32 4} -; CHECK: [[RNG4]] = !{i32 1, i32 5} +; CHECK: [[RNG1]] = !{i32 0, i32 2, i32 3, i32 5} +; CHECK: [[RNG2]] = !{i32 0, i32 5} +; CHECK: [[RNG3]] = !{i32 -5, i32 -2, i32 1, i32 5} +; CHECK: [[RNG4]] = !{i32 10, i32 1} +; CHECK: [[RNG5]] = !{i32 3, i32 4, i32 5, i32 2} ;. diff --git a/llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion-loads.ll b/llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion-loads.ll new file mode 100644 index 0000000000000..2fe8f39e423a5 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion-loads.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -O3 -S < %s | FileCheck %s + +; Arg promotion eliminates the struct argument, and eliminates dead arguments, but introduces and leaves dead loads of the eliminated dead arg in callers + +%struct.ss = type { ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr } + +define internal void @phantomLoad(ptr %p, ptr %y, ptr %x) { +entry: + %0 = load i32, ptr %x + store i32 %0, ptr %y + ret void +} + +define ptr @parent(ptr align 8 dereferenceable(72) %f, i16 %val1, i16 %val2, i32 %val3) align 2 { +; CHECK-LABEL: define nonnull ptr @parent +; CHECK-SAME: (ptr readonly returned align 8 dereferenceable(72) [[F:%.*]], i16 [[VAL1:%.*]], i16 [[VAL2:%.*]], i32 [[VAL3:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] align 2 { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[F]], i64 64 +; CHECK-NEXT: [[F_VAL:%.*]] = load ptr, ptr [[TMP0]], align 8 +; CHECK-NEXT: [[CMP_NOT_NOT_I:%.*]] = icmp eq i32 [[VAL3]], 0 +; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = select i1 [[CMP_NOT_NOT_I]], i16 [[VAL1]], i16 [[VAL2]] +; CHECK-NEXT: [[SPEC_SELECT2_I:%.*]] = select i1 [[CMP_NOT_NOT_I]], i16 [[VAL2]], i16 [[VAL1]] +; CHECK-NEXT: store i16 [[SPEC_SELECT_I]], ptr [[F_VAL]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[F_VAL]], i64 16 +; CHECK-NEXT: store i16 [[SPEC_SELECT2_I]], ptr [[TMP1]], align 2 +; CHECK-NEXT: ret ptr [[F]] +; +entry: + call void @badChild(ptr align 8 dereferenceable(72) %f, i16 %val1, i16 %val2, i32 %val3) #4 + ret ptr %f +} + +define internal void @badChild(ptr align 8 dereferenceable(72) %this, i16 %val1, i16 %val2, i32 %val3) align 2 { +entry: + %othergep = getelementptr inbounds %struct.ss, ptr %this, i64 0, i32 2 + %load0 = load ptr, ptr %othergep, align 8 + %load2 = load ptr, ptr %this + %x = alloca i32 + %y = alloca i32 + call void @phantomLoad(ptr %load0, ptr %x, ptr %y) + call void @phantomLoad(ptr %load2, ptr %x, ptr %y) + %cmp.not.not = icmp eq i32 %val3, 0 + br i1 %cmp.not.not, label %if.then, label %if.else + +if.then: ; preds = %entry + %0 = getelementptr inbounds %struct.ss, ptr %this, i64 0, i32 8 + %1 = load ptr, ptr %0, align 8 + store i16 %val1, ptr %1, align 2 + %add.ptr.i.i.i.i = getelementptr inbounds i8, ptr %1, i64 16 + store i16 %val2, ptr %add.ptr.i.i.i.i, align 2 + br label %if.end + +if.else: ; preds = %entry + %2 = getelementptr inbounds %struct.ss, ptr %this, i64 0, i32 8 + %3 = load ptr, ptr %2, align 8 + %add.ptr.i.i.i.i7 = getelementptr inbounds i8, ptr %3, i64 16 + store i16 %val1, ptr %add.ptr.i.i.i.i7, align 2 + store i16 %val2, ptr %3, align 2 + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} + diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/load-binop-store.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/load-binop-store.ll new file mode 100644 index 0000000000000..92b0f83c84b9e --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/load-binop-store.ll @@ -0,0 +1,386 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 -mattr=+v \ +; RUN: -riscv-v-vector-bits-min=-1 -riscv-v-slp-max-vf=0 -S | FileCheck %s --check-prefixes=CHECK +; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 -mattr=+v -S | FileCheck %s --check-prefixes=DEFAULT + +define void @vec_add(ptr %dest, ptr %p) { +; CHECK-LABEL: @vec_add( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i16> [[TMP0]], +; CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 +; CHECK-NEXT: ret void +; +; DEFAULT-LABEL: @vec_add( +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 +; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 +; DEFAULT-NEXT: [[A0:%.*]] = add i16 [[E0]], 1 +; DEFAULT-NEXT: [[A1:%.*]] = add i16 [[E1]], 1 +; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 +; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 +; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: ret void +; +entry: + %e0 = load i16, ptr %p, align 4 + %inc = getelementptr inbounds i16, ptr %p, i64 1 + %e1 = load i16, ptr %inc, align 2 + + %a0 = add i16 %e0, 1 + %a1 = add i16 %e1, 1 + + store i16 %a0, ptr %dest, align 4 + %inc2 = getelementptr inbounds i16, ptr %dest, i64 1 + store i16 %a1, ptr %inc2, align 2 + ret void +} + +define void @vec_sub(ptr %dest, ptr %p) { +; CHECK-LABEL: @vec_sub( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i16> [[TMP0]], +; CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 +; CHECK-NEXT: ret void +; +; DEFAULT-LABEL: @vec_sub( +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 +; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 +; DEFAULT-NEXT: [[A0:%.*]] = sub i16 [[E0]], 17 +; DEFAULT-NEXT: [[A1:%.*]] = sub i16 [[E1]], 17 +; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 +; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 +; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: ret void +; +entry: + %e0 = load i16, ptr %p, align 4 + %inc = getelementptr inbounds i16, ptr %p, i64 1 + %e1 = load i16, ptr %inc, align 2 + + %a0 = sub i16 %e0, 17 + %a1 = sub i16 %e1, 17 + + store i16 %a0, ptr %dest, align 4 + %inc2 = getelementptr inbounds i16, ptr %dest, i64 1 + store i16 %a1, ptr %inc2, align 2 + ret void +} + +define void @vec_rsub(ptr %dest, ptr %p) { +; CHECK-LABEL: @vec_rsub( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i16> , [[TMP0]] +; CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 +; CHECK-NEXT: ret void +; +; DEFAULT-LABEL: @vec_rsub( +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 +; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 +; DEFAULT-NEXT: [[A0:%.*]] = sub i16 29, [[E0]] +; DEFAULT-NEXT: [[A1:%.*]] = sub i16 29, [[E1]] +; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 +; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 +; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: ret void +; +entry: + %e0 = load i16, ptr %p, align 4 + %inc = getelementptr inbounds i16, ptr %p, i64 1 + %e1 = load i16, ptr %inc, align 2 + + %a0 = sub i16 29, %e0 + %a1 = sub i16 29, %e1 + + store i16 %a0, ptr %dest, align 4 + %inc2 = getelementptr inbounds i16, ptr %dest, i64 1 + store i16 %a1, ptr %inc2, align 2 + ret void +} + +define void @vec_mul(ptr %dest, ptr %p) { +; CHECK-LABEL: @vec_mul( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = mul <2 x i16> [[TMP0]], +; CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 +; CHECK-NEXT: ret void +; +; DEFAULT-LABEL: @vec_mul( +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 +; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 +; DEFAULT-NEXT: [[A0:%.*]] = mul i16 [[E0]], 7 +; DEFAULT-NEXT: [[A1:%.*]] = mul i16 [[E1]], 7 +; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 +; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 +; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: ret void +; +entry: + %e0 = load i16, ptr %p, align 4 + %inc = getelementptr inbounds i16, ptr %p, i64 1 + %e1 = load i16, ptr %inc, align 2 + + %a0 = mul i16 %e0, 7 + %a1 = mul i16 %e1, 7 + + store i16 %a0, ptr %dest, align 4 + %inc2 = getelementptr inbounds i16, ptr %dest, i64 1 + store i16 %a1, ptr %inc2, align 2 + ret void +} + +define void @vec_sdiv(ptr %dest, ptr %p) { +; CHECK-LABEL: @vec_sdiv( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = sdiv <2 x i16> [[TMP0]], +; CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 +; CHECK-NEXT: ret void +; +; DEFAULT-LABEL: @vec_sdiv( +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 +; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 +; DEFAULT-NEXT: [[A0:%.*]] = sdiv i16 [[E0]], 7 +; DEFAULT-NEXT: [[A1:%.*]] = sdiv i16 [[E1]], 7 +; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 +; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 +; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: ret void +; +entry: + %e0 = load i16, ptr %p, align 4 + %inc = getelementptr inbounds i16, ptr %p, i64 1 + %e1 = load i16, ptr %inc, align 2 + + %a0 = sdiv i16 %e0, 7 + %a1 = sdiv i16 %e1, 7 + + store i16 %a0, ptr %dest, align 4 + %inc2 = getelementptr inbounds i16, ptr %dest, i64 1 + store i16 %a1, ptr %inc2, align 2 + ret void +} + +define void @vec_and(ptr %dest, ptr %p, ptr %q) { +; CHECK-LABEL: @vec_and( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[Q:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i16> [[TMP0]], [[TMP1]] +; CHECK-NEXT: store <2 x i16> [[TMP2]], ptr [[DEST:%.*]], align 4 +; CHECK-NEXT: ret void +; +; DEFAULT-LABEL: @vec_and( +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 +; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 +; DEFAULT-NEXT: [[F0:%.*]] = load i16, ptr [[Q:%.*]], align 4 +; DEFAULT-NEXT: [[INQ:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 1 +; DEFAULT-NEXT: [[F1:%.*]] = load i16, ptr [[INQ]], align 2 +; DEFAULT-NEXT: [[A0:%.*]] = and i16 [[E0]], [[F0]] +; DEFAULT-NEXT: [[A1:%.*]] = and i16 [[E1]], [[F1]] +; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 +; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 +; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: ret void +; +entry: + %e0 = load i16, ptr %p, align 4 + %inc = getelementptr inbounds i16, ptr %p, i64 1 + %e1 = load i16, ptr %inc, align 2 + + %f0 = load i16, ptr %q, align 4 + %inq = getelementptr inbounds i16, ptr %q, i64 1 + %f1 = load i16, ptr %inq, align 2 + + %a0 = and i16 %e0, %f0 + %a1 = and i16 %e1, %f1 + + store i16 %a0, ptr %dest, align 4 + %inc2 = getelementptr inbounds i16, ptr %dest, i64 1 + store i16 %a1, ptr %inc2, align 2 + ret void +} + +define void @vec_or(ptr %dest, ptr %p, ptr %q) { +; CHECK-LABEL: @vec_or( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[Q:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i16> [[TMP0]], [[TMP1]] +; CHECK-NEXT: store <2 x i16> [[TMP2]], ptr [[DEST:%.*]], align 4 +; CHECK-NEXT: ret void +; +; DEFAULT-LABEL: @vec_or( +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 +; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 +; DEFAULT-NEXT: [[F0:%.*]] = load i16, ptr [[Q:%.*]], align 4 +; DEFAULT-NEXT: [[INQ:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 1 +; DEFAULT-NEXT: [[F1:%.*]] = load i16, ptr [[INQ]], align 2 +; DEFAULT-NEXT: [[A0:%.*]] = or i16 [[E0]], [[F0]] +; DEFAULT-NEXT: [[A1:%.*]] = or i16 [[E1]], [[F1]] +; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 +; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 +; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: ret void +; +entry: + %e0 = load i16, ptr %p, align 4 + %inc = getelementptr inbounds i16, ptr %p, i64 1 + %e1 = load i16, ptr %inc, align 2 + + %f0 = load i16, ptr %q, align 4 + %inq = getelementptr inbounds i16, ptr %q, i64 1 + %f1 = load i16, ptr %inq, align 2 + + %a0 = or i16 %e0, %f0 + %a1 = or i16 %e1, %f1 + + store i16 %a0, ptr %dest, align 4 + %inc2 = getelementptr inbounds i16, ptr %dest, i64 1 + store i16 %a1, ptr %inc2, align 2 + ret void +} + +define void @vec_sll(ptr %dest, ptr %p, ptr %q) { +; CHECK-LABEL: @vec_sll( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[Q:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i16> [[TMP0]], [[TMP1]] +; CHECK-NEXT: store <2 x i16> [[TMP2]], ptr [[DEST:%.*]], align 4 +; CHECK-NEXT: ret void +; +; DEFAULT-LABEL: @vec_sll( +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 +; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 +; DEFAULT-NEXT: [[F0:%.*]] = load i16, ptr [[Q:%.*]], align 4 +; DEFAULT-NEXT: [[INQ:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 1 +; DEFAULT-NEXT: [[F1:%.*]] = load i16, ptr [[INQ]], align 2 +; DEFAULT-NEXT: [[A0:%.*]] = shl i16 [[E0]], [[F0]] +; DEFAULT-NEXT: [[A1:%.*]] = shl i16 [[E1]], [[F1]] +; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 +; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 +; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: ret void +; +entry: + %e0 = load i16, ptr %p, align 4 + %inc = getelementptr inbounds i16, ptr %p, i64 1 + %e1 = load i16, ptr %inc, align 2 + + %f0 = load i16, ptr %q, align 4 + %inq = getelementptr inbounds i16, ptr %q, i64 1 + %f1 = load i16, ptr %inq, align 2 + + %a0 = shl i16 %e0, %f0 + %a1 = shl i16 %e1, %f1 + + store i16 %a0, ptr %dest, align 4 + %inc2 = getelementptr inbounds i16, ptr %dest, i64 1 + store i16 %a1, ptr %inc2, align 2 + ret void +} + +declare i16 @llvm.smin.i16(i16, i16) +define void @vec_smin(ptr %dest, ptr %p, ptr %q) { +; CHECK-LABEL: @vec_smin( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[Q:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +; CHECK-NEXT: store <2 x i16> [[TMP2]], ptr [[DEST:%.*]], align 4 +; CHECK-NEXT: ret void +; +; DEFAULT-LABEL: @vec_smin( +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 +; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 +; DEFAULT-NEXT: [[F0:%.*]] = load i16, ptr [[Q:%.*]], align 4 +; DEFAULT-NEXT: [[INQ:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 1 +; DEFAULT-NEXT: [[F1:%.*]] = load i16, ptr [[INQ]], align 2 +; DEFAULT-NEXT: [[A0:%.*]] = tail call i16 @llvm.smin.i16(i16 [[E0]], i16 [[F0]]) +; DEFAULT-NEXT: [[A1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[E1]], i16 [[F1]]) +; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 +; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 +; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: ret void +; +entry: + %e0 = load i16, ptr %p, align 4 + %inc = getelementptr inbounds i16, ptr %p, i64 1 + %e1 = load i16, ptr %inc, align 2 + + %f0 = load i16, ptr %q, align 4 + %inq = getelementptr inbounds i16, ptr %q, i64 1 + %f1 = load i16, ptr %inq, align 2 + + %a0 = tail call i16 @llvm.smin.i16(i16 %e0, i16 %f0) + %a1 = tail call i16 @llvm.smin.i16(i16 %e1, i16 %f1) + + store i16 %a0, ptr %dest, align 4 + %inc2 = getelementptr inbounds i16, ptr %dest, i64 1 + store i16 %a1, ptr %inc2, align 2 + ret void +} + +declare i16 @llvm.umax.i16(i16, i16) +define void @vec_umax(ptr %dest, ptr %p, ptr %q) { +; CHECK-LABEL: @vec_umax( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[Q:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +; CHECK-NEXT: store <2 x i16> [[TMP2]], ptr [[DEST:%.*]], align 4 +; CHECK-NEXT: ret void +; +; DEFAULT-LABEL: @vec_umax( +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 +; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 +; DEFAULT-NEXT: [[F0:%.*]] = load i16, ptr [[Q:%.*]], align 4 +; DEFAULT-NEXT: [[INQ:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 1 +; DEFAULT-NEXT: [[F1:%.*]] = load i16, ptr [[INQ]], align 2 +; DEFAULT-NEXT: [[A0:%.*]] = tail call i16 @llvm.umax.i16(i16 [[E0]], i16 [[F0]]) +; DEFAULT-NEXT: [[A1:%.*]] = tail call i16 @llvm.umax.i16(i16 [[E1]], i16 [[F1]]) +; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 +; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 +; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: ret void +; +entry: + %e0 = load i16, ptr %p, align 4 + %inc = getelementptr inbounds i16, ptr %p, i64 1 + %e1 = load i16, ptr %inc, align 2 + + %f0 = load i16, ptr %q, align 4 + %inq = getelementptr inbounds i16, ptr %q, i64 1 + %f1 = load i16, ptr %inq, align 2 + + %a0 = tail call i16 @llvm.umax.i16(i16 %e0, i16 %f0) + %a1 = tail call i16 @llvm.umax.i16(i16 %e1, i16 %f1) + + store i16 %a0, ptr %dest, align 4 + %inc2 = getelementptr inbounds i16, ptr %dest, i64 1 + store i16 %a1, ptr %inc2, align 2 + ret void +} diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll index cb017795077f1..08bac7f788c77 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll @@ -10,26 +10,10 @@ target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" target triple = "riscv64" define void @foo(ptr nocapture writeonly %da) { -; CHECK-128-LABEL: @foo( -; CHECK-128-NEXT: entry: -; CHECK-128-NEXT: store i64 0, ptr [[DA:%.*]], align 8 -; CHECK-128-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[DA]], i64 1 -; CHECK-128-NEXT: store i64 0, ptr [[ARRAYIDX1]], align 8 -; CHECK-128-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[DA]], i64 2 -; CHECK-128-NEXT: store i64 0, ptr [[ARRAYIDX2]], align 8 -; CHECK-128-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, ptr [[DA]], i64 3 -; CHECK-128-NEXT: store i64 0, ptr [[ARRAYIDX3]], align 8 -; CHECK-128-NEXT: ret void -; -; CHECK-256-LABEL: @foo( -; CHECK-256-NEXT: entry: -; CHECK-256-NEXT: store <4 x i64> zeroinitializer, ptr [[DA:%.*]], align 8 -; CHECK-256-NEXT: ret void -; -; CHECK-512-LABEL: @foo( -; CHECK-512-NEXT: entry: -; CHECK-512-NEXT: store <4 x i64> zeroinitializer, ptr [[DA:%.*]], align 8 -; CHECK-512-NEXT: ret void +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr [[DA:%.*]], align 8 +; CHECK-NEXT: ret void ; entry: store i64 0, ptr %da, align 8 @@ -58,3 +42,7 @@ entry: %arrayidx2 = getelementptr inbounds i8, ptr %da, i8 2 ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-128: {{.*}} +; CHECK-256: {{.*}} +; CHECK-512: {{.*}} diff --git a/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll b/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll index 4a6e3634a5d16..0b1023eee2732 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll @@ -233,6 +233,54 @@ define i32 @fn3_rv(ptr %obj) #0 { ret i32 %result } +; CHECK-LABEL: define i32 @fn4 +; CHECK-NOT: call void (...) @llvm.icall.branch.funnel +define i32 @fn4(ptr %obj) #0 { + %p = call i1 @llvm.type.test(ptr @vt1_1, metadata !"typeid1") + call void @llvm.assume(i1 %p) + %fptr = load ptr, ptr @vt1_1 + ; RETP: call i32 @__typeid_typeid1_0_branch_funnel(ptr nest @vt1_1, ptr %obj, i32 1) + %result = call i32 %fptr(ptr %obj, i32 1) + ; NORETP: call i32 % + ret i32 %result +} + +; CHECK-LABEL: define i32 @fn4_cpy +; CHECK-NOT: call void (...) @llvm.icall.branch.funnel +define i32 @fn4_cpy(ptr %obj) #0 { + %p = call i1 @llvm.type.test(ptr @vt1_1, metadata !"typeid1") + call void @llvm.assume(i1 %p) + %fptr = load ptr, ptr @vt1_1 + ; RETP: call i32 @__typeid_typeid1_0_branch_funnel(ptr nest @vt1_1, ptr %obj, i32 1) + %result = call i32 %fptr(ptr %obj, i32 1) + ; NORETP: call i32 % + ret i32 %result +} + +; CHECK-LABEL: define i32 @fn4_rv +; CHECK-NOT: call void (...) @llvm.icall.branch.funnel +define i32 @fn4_rv(ptr %obj) #0 { + %p = call i1 @llvm.type.test(ptr @vt1_1_rv, metadata !"typeid1_rv") + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr @vt1_1_rv, i32 0) + ; RETP: call i32 @__typeid_typeid1_rv_0_branch_funnel(ptr nest @vt1_1_rv, ptr %obj, i32 1) + %result = call i32 %fptr(ptr %obj, i32 1) + ; NORETP: call i32 % + ret i32 %result +} + +; CHECK-LABEL: define i32 @fn4_rv_cpy +; CHECK-NOT: call void (...) @llvm.icall.branch.funnel +define i32 @fn4_rv_cpy(ptr %obj) #0 { + %p = call i1 @llvm.type.test(ptr @vt1_1_rv, metadata !"typeid1_rv") + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr @vt1_1_rv, i32 0) + ; RETP: call i32 @__typeid_typeid1_rv_0_branch_funnel(ptr nest @vt1_1_rv, ptr %obj, i32 1) + %result = call i32 %fptr(ptr %obj, i32 1) + ; NORETP: call i32 % + ret i32 %result +} + ; CHECK-LABEL: define hidden void @__typeid_typeid1_0_branch_funnel(ptr nest %0, ...) ; CHECK-NEXT: musttail call void (...) @llvm.icall.branch.funnel(ptr %0, ptr {{(nonnull )?}}@vt1_1, ptr {{(nonnull )?}}@vf1_1, ptr {{(nonnull )?}}@vt1_2, ptr {{(nonnull )?}}@vf1_2, ...) diff --git a/llvm/test/tools/llvm-dwarfdump/ARM/aligned_line_tables.s b/llvm/test/tools/llvm-dwarfdump/ARM/aligned_line_tables.s new file mode 100644 index 0000000000000..f59ce7aa9f774 --- /dev/null +++ b/llvm/test/tools/llvm-dwarfdump/ARM/aligned_line_tables.s @@ -0,0 +1,152 @@ +// RUN: llvm-mc %s -defsym ALIGN_4=1 -save-temp-labels -filetype obj -triple arm-none-eabi -o %t.o +// RUN: llvm-nm %t.o | FileCheck %s --check-prefix=L4 +// RUN: llvm-dwarfdump -debug-line %t.o 2>&1 | FileCheck %s --implicit-check-not='warning:' --check-prefix=MULT4 + +// RUN: llvm-mc %s -defsym ALIGN_8=1 -save-temp-labels -filetype obj -triple arm-none-eabi -o %t.o +// RUN: llvm-nm %t.o | FileCheck %s --check-prefix=L8 +// RUN: llvm-dwarfdump -debug-line %t.o 2>&1 | FileCheck %s --implicit-check-not='warning:' --check-prefix=MULT8 + +// RUN: llvm-mc %s -defsym UNALIGNED_PADDING=1 -save-temp-labels -filetype obj -triple arm-none-eabi -o %t.o +// RUN: llvm-nm %t.o | FileCheck %s --check-prefix=LUNALIGN +// RUN: llvm-dwarfdump -debug-line %t.o 2>&1 | FileCheck %s --check-prefix=UNALIGN + +/// This test is based on a real example from ARM C/C++ Compiler. +/// It verifies llvm-dwarfdump is able to dump line tables even if they've been +/// placed at aligned offsets. + +// L4: 0000002b N .Ltable0_end +// MULT4: Address Line Column File ISA Discriminator Flags +// MULT4-NEXT: ------------------ ------ ------ ------ --- ------------- ------------- +// MULT4-NEXT: 0x0000000000000000 1 0 1 0 0 is_stmt end_sequence +// MULT4-EMPTY: +// MULT4-NEXT: debug_line[0x0000002c] +// MULT4-NEXT: Line table prologue: +// MULT4-NEXT: total_length: 0x0000003a{{$}} +// MULT4-NEXT: format: DWARF32 +// MULT4-NEXT: version: 2{{$}} +// MULT4-NEXT: prologue_length: 0x0000001a +// MULT4-NEXT: min_inst_length: 2 +// MULT4-NEXT: default_is_stmt: 1 + +// L8: 00000027 N .Ltable0_end +// MULT8: Address Line Column File ISA Discriminator Flags +// MULT8-NEXT: ------------------ ------ ------ ------ --- ------------- ------------- +// MULT8-NEXT: 0x0000000000000000 1 0 1 0 0 is_stmt end_sequence +// MULT8-EMPTY: +// MULT8-NEXT: debug_line[0x00000028] +// MULT8-NEXT: Line table prologue: +// MULT8-NEXT: total_length: 0x0000003a{{$}} +// MULT8-NEXT: format: DWARF32 +// MULT8-NEXT: version: 2{{$}} +// MULT8-NEXT: prologue_length: 0x0000001a +// MULT8-NEXT: min_inst_length: 2 +// MULT8-NEXT: default_is_stmt: 1 + +/// This should fail to dump: +// LUNALIGN: 00000027 N .Ltable0_end +// UNALIGN: warning: parsing line table prologue at offset 0x00000027: unsupported version + +.section .debug_line +/// First line table +/// Unit total length: +.long .Ltable0_end - .Ltable0_start +.Ltable0_start: +.short 2 /// Version +/// Header length: +.long .Ltable0_header_end - .Ltable0_header_start +.Ltable0_header_start: +.byte 4 /// Min instruction length +.byte 1 /// Max operations per instruction +.byte 0 /// Default is statement +.byte 6 /// Line range +.byte 10 /// Opcode base +.byte 0 /// standard_opcode_lengths[DW_LNS_copy] = 0 +.byte 1 /// standard_opcode_lengths[DW_LNS_advance_pc] = 1 +.byte 1 /// standard_opcode_lengths[DW_LNS_advance_line] = 1 +.byte 1 /// standard_opcode_lengths[DW_LNS_set_file] = 1 +.byte 1 /// standard_opcode_lengths[DW_LNS_set_column] = 1 +.byte 0 /// standard_opcode_lengths[DW_LNS_negate_stmt] = 0 +.byte 0 /// standard_opcode_lengths[DW_LNS_set_basic_block] = 0 +.byte 0 /// standard_opcode_lengths[DW_LNS_const_add_pc] = 0 +.byte 0 /// standard_opcode_lengths[DW_LNS_fixed_advance_pc] = 0 +.byte 0 /// No include directories +/// File name: +.ifdef ALIGN_4 +/// Pad out filename so next 4 byte aligned offset is a multiple of 4 and not 8. +.asciz "foobar.cpp" +.else +.asciz "test.c" +.endif +.byte 0 /// Dir idx +.byte 0 /// Mod time +.byte 0 /// Length +.byte 0 /// End files +.Ltable0_header_end: +/// Line table operations +.byte 0 /// Extended opcode +.byte 1 /// Length 1 +.byte 1 /// DW_LNE_end_sequence +.Ltable0_end: +/// End first line table +/// Padding: +.ifdef UNALIGNED_PADDING +.short 0 +.else +.byte 0 +.endif +/// Second line table +/// Unit total length: +.long .Ltable1_end - .Ltable1_start +.Ltable1_start: +.short 2 /// Version +/// Header length: +.long .Ltable1_header_end - .Ltable1_header_start +.Ltable1_header_start: +.byte 2 /// Min instruction length +.byte 1 /// Max operations per instruction +.byte 0 /// Default is statement +.byte 6 /// Line range +.byte 10 /// Opcode base +.byte 0 /// standard_opcode_lengths[DW_LNS_copy] = 0 +.byte 1 /// standard_opcode_lengths[DW_LNS_advance_pc] = 1 +.byte 1 /// standard_opcode_lengths[DW_LNS_advance_line] = 1 +.byte 1 /// standard_opcode_lengths[DW_LNS_set_file] = 1 +.byte 1 /// standard_opcode_lengths[DW_LNS_set_column] = 1 +.byte 0 /// standard_opcode_lengths[DW_LNS_negate_stmt] = 0 +.byte 0 /// standard_opcode_lengths[DW_LNS_set_basic_block] = 0 +.byte 0 /// standard_opcode_lengths[DW_LNS_const_add_pc] = 0 +.byte 0 /// standard_opcode_lengths[DW_LNS_fixed_advance_pc] = 0 +.byte 0 /// No include directories +.asciz "test.c" /// File name +.byte 0 /// Dir idx +.byte 0 /// Mod time +.byte 0 /// Length +.byte 0 /// End files +.Ltable1_header_end: +/// Line table operations +.byte 4 /// DW_LNS_set_file +.byte 1 /// File 1 +.byte 5 /// DW_LNS_set_column +.byte 1 /// Column 1 +.byte 0 /// Extended opcode +.byte 5 /// Length 5 +.byte 2 /// DW_LNE_set_address +.long 32896 /// Address = 0x00008080 +.byte 3 /// DW_LNS_advance_line +.byte 6 /// Line += 6 +.byte 1 /// DW_LNS_copy +.byte 5 /// DW_LNS_set_column +.byte 2 /// Column 2 +.byte 12 /// Special opcode (address += 0, line += 2) +.byte 30 /// Special opcode (address += 6, line += 2) +.byte 5 /// DW_LNS_set_column +.byte 1 /// Column 1 +.byte 17 /// Special opcode (address += 2, line += 1) +.byte 2 /// DW_LNS_advance_pc +.byte 4 /// += (4 * min instruction length) +.byte 0 /// Extended opcode +.byte 1 /// Length 1 +.byte 1 /// DW_LNE_end_sequence +.Ltable1_end: +/// End second line table +.short 0 /// Padding (to make section a word multiple) diff --git a/llvm/test/tools/llvm-dwarfdump/ARM/lit.local.cfg b/llvm/test/tools/llvm-dwarfdump/ARM/lit.local.cfg new file mode 100644 index 0000000000000..236e1d3441665 --- /dev/null +++ b/llvm/test/tools/llvm-dwarfdump/ARM/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'ARM' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/tools/llvm-lib/duplicate.test b/llvm/test/tools/llvm-lib/duplicate.test index 098858d4fbcd1..87dae66cb80be 100644 --- a/llvm/test/tools/llvm-lib/duplicate.test +++ b/llvm/test/tools/llvm-lib/duplicate.test @@ -14,3 +14,12 @@ CHECK: bar.o CHECK-NEXT: abc.o CHECK-NEXT: foo.o CHECK-NOT: foo.o + +# Check that symbol map contains sorted, de-duplicated symbols. +RUN: cd %t && llvm-lib -out:foo.lib foo.o foo.o abc.o bar.o foo.o foo.o +RUN: llvm-nm --print-armap %t/foo.lib | FileCheck %s --check-prefix=DUP +# DUP: Archive map +# DUP-NEXT: a in abc.o +# DUP-NEXT: b in bar.o +# DUP-NEXT: c in abc.o +# DUP-EMPTY diff --git a/llvm/test/tools/llvm-objdump/ELF/RISCV/riscv-attributes.s b/llvm/test/tools/llvm-objdump/ELF/RISCV/riscv-attributes.s index 7c41b63dcbf86..d15b675450a90 100644 --- a/llvm/test/tools/llvm-objdump/ELF/RISCV/riscv-attributes.s +++ b/llvm/test/tools/llvm-objdump/ELF/RISCV/riscv-attributes.s @@ -31,7 +31,7 @@ vsetvli a3, a2, e8, m8, tu, mu .Lend: #--- invalid_arch.s -# INVALID: string must begin with rv32{i,e,g} or rv64{i,g} +# INVALID: string must begin with rv32{i,e,g} or rv64{i,e,g} nop .section .riscv.attributes,"",@0x70000003 diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp index ed65b83487790..860fa39d57e8a 100644 --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -496,6 +496,24 @@ static int compileModule(char **argv, LLVMContext &Context) { TargetOptions Options; auto InitializeOptions = [&](const Triple &TheTriple) { Options = codegen::InitTargetOptionsFromCodeGenFlags(TheTriple); + + if (Options.XCOFFReadOnlyPointers) { + if (!TheTriple.isOSAIX()) + reportError("-mroptr option is only supported on AIX", InputFilename); + + // Since the storage mapping class is specified per csect, + // without using data sections, it is less effective to use read-only + // pointers. Using read-only pointers may cause other RO variables in the + // same csect to become RW when the linker acts upon `-bforceimprw`; + // therefore, we require that separate data sections are used in the + // presence of ReadOnlyPointers. We respect the setting of data-sections + // since we have not found reasons to do otherwise that overcome the user + // surprise of not respecting the setting. + if (!Options.DataSections) + reportError("-mroptr option must be used with -data-sections", + InputFilename); + } + Options.BinutilsVersion = TargetMachine::parseBinutilsVersion(BinutilsVersion); Options.DisableIntegratedAS = NoIntegratedAssembler; diff --git a/llvm/tools/llvm-objdump/ObjdumpOpts.td b/llvm/tools/llvm-objdump/ObjdumpOpts.td index de7f883d24a80..c6627c75157b8 100644 --- a/llvm/tools/llvm-objdump/ObjdumpOpts.td +++ b/llvm/tools/llvm-objdump/ObjdumpOpts.td @@ -145,10 +145,10 @@ def reloc : Flag<["--"], "reloc">, def : Flag<["-"], "r">, Alias, HelpText<"Alias for --reloc">; def print_imm_hex : Flag<["--"], "print-imm-hex">, - HelpText<"Use hex format for immediate values">; + HelpText<"Use hex format for immediate values (default)">; def no_print_imm_hex : Flag<["--"], "no-print-imm-hex">, - HelpText<"Do not use hex format for immediate values (default)">; + HelpText<"Do not use hex format for immediate values">; def : Flag<["--"], "print-imm-hex=false">, Alias; def private_headers : Flag<["--"], "private-headers">, diff --git a/llvm/tools/llvm-reduce/deltas/ReduceVirtualRegisters.cpp b/llvm/tools/llvm-reduce/deltas/ReduceVirtualRegisters.cpp index eed5be7054e41..2b97e65bbf093 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceVirtualRegisters.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceVirtualRegisters.cpp @@ -23,7 +23,7 @@ static void dropRegisterHintsFromFunction(Oracle &O, MachineFunction &MF) { for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { Register Reg = Register::index2VirtReg(I); - const std::pair> &Hints = + const std::pair> &Hints = MRI.getRegAllocationHints(Reg); if (Hints.second.empty()) continue; diff --git a/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp b/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp index 0146c3b4cf6e0..a3cb1b6fd638b 100644 --- a/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp +++ b/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp @@ -92,6 +92,24 @@ TEST(LinkGraphTest, AddressAccess) { EXPECT_EQ(B1.getFixupAddress(E1), B1Addr + 8) << "Incorrect fixup address"; } +TEST(LinkGraphTest, SectionEmpty) { + // Check that Section::empty behaves as expected. + LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, support::little, + getGenericEdgeKindName); + auto &Sec1 = + G.createSection("__data.1", orc::MemProt::Read | orc::MemProt::Write); + auto &B = + G.createContentBlock(Sec1, BlockContent, orc::ExecutorAddr(0x1000), 8, 0); + G.addDefinedSymbol(B, 0, "S", 4, Linkage::Strong, Scope::Default, false, + false); + + auto &Sec2 = + G.createSection("__data.2", orc::MemProt::Read | orc::MemProt::Write); + + EXPECT_FALSE(Sec1.empty()); + EXPECT_TRUE(Sec2.empty()); +} + TEST(LinkGraphTest, BlockAndSymbolIteration) { // Check that we can iterate over blocks within Sections and across sections. LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, support::little, diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 05a1d7a58b84d..aee8ed26a6fa6 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -5730,8 +5730,9 @@ TEST_F(OpenMPIRBuilderTest, EmitOffloadingArraysArguments) { } TEST_F(OpenMPIRBuilderTest, OffloadEntriesInfoManager) { - OffloadEntriesInfoManager InfoManager; - InfoManager.setConfig(OpenMPIRBuilderConfig(true, false, false, false)); + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.setConfig(OpenMPIRBuilderConfig(true, false, false, false)); + OffloadEntriesInfoManager &InfoManager = OMPBuilder.OffloadInfoManager; TargetRegionEntryInfo EntryInfo("parent", 1, 2, 4, 0); InfoManager.initializeTargetRegionEntryInfo(EntryInfo, 0); EXPECT_TRUE(InfoManager.hasTargetRegionEntryInfo(EntryInfo)); diff --git a/llvm/unittests/Support/RISCVISAInfoTest.cpp b/llvm/unittests/Support/RISCVISAInfoTest.cpp index 2f623a94ce2a8..05997d2d2d2c4 100644 --- a/llvm/unittests/Support/RISCVISAInfoTest.cpp +++ b/llvm/unittests/Support/RISCVISAInfoTest.cpp @@ -109,7 +109,7 @@ TEST(ParseArchString, RejectsUpperCase) { TEST(ParseArchString, RejectsInvalidBaseISA) { for (StringRef Input : {"rv32", "rv64", "rv65i"}) { EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()), - "string must begin with rv32{i,e,g} or rv64{i,g}"); + "string must begin with rv32{i,e,g} or rv64{i,e,g}"); } for (StringRef Input : {"rv32j", "rv64k", "rv32_i"}) { EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()), @@ -118,11 +118,9 @@ TEST(ParseArchString, RejectsInvalidBaseISA) { } TEST(ParseArchString, RejectsUnsupportedBaseISA) { - EXPECT_EQ(toString(RISCVISAInfo::parseArchString("rv64e", true).takeError()), - "standard user-level extension 'e' requires 'rv32'"); for (StringRef Input : {"rv128i", "rv128g"}) { EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()), - "string must begin with rv32{i,e,g} or rv64{i,g}"); + "string must begin with rv32{i,e,g} or rv64{i,e,g}"); } } @@ -141,7 +139,7 @@ TEST(ParseArchString, AcceptsSupportedBaseISAsAndSetsXLenAndFLen) { RISCVISAInfo &InfoRV32E = **MaybeRV32E; RISCVISAInfo::OrderedExtensionMap ExtsRV32E = InfoRV32E.getExtensions(); EXPECT_EQ(ExtsRV32E.size(), 1UL); - EXPECT_TRUE(ExtsRV32E.at("e") == (RISCVExtensionInfo{1, 9})); + EXPECT_TRUE(ExtsRV32E.at("e") == (RISCVExtensionInfo{2, 0})); EXPECT_EQ(InfoRV32E.getXLen(), 32U); EXPECT_EQ(InfoRV32E.getFLen(), 0U); @@ -167,6 +165,15 @@ TEST(ParseArchString, AcceptsSupportedBaseISAsAndSetsXLenAndFLen) { EXPECT_EQ(InfoRV64I.getXLen(), 64U); EXPECT_EQ(InfoRV64I.getFLen(), 0U); + auto MaybeRV64E = RISCVISAInfo::parseArchString("rv64e", true); + ASSERT_THAT_EXPECTED(MaybeRV64E, Succeeded()); + RISCVISAInfo &InfoRV64E = **MaybeRV64E; + RISCVISAInfo::OrderedExtensionMap ExtsRV64E = InfoRV64E.getExtensions(); + EXPECT_EQ(ExtsRV64E.size(), 1UL); + EXPECT_TRUE(ExtsRV64E.at("e") == (RISCVExtensionInfo{2, 0})); + EXPECT_EQ(InfoRV64E.getXLen(), 64U); + EXPECT_EQ(InfoRV64E.getFLen(), 0U); + auto MaybeRV64G = RISCVISAInfo::parseArchString("rv64g", true); ASSERT_THAT_EXPECTED(MaybeRV64G, Succeeded()); RISCVISAInfo &InfoRV64G = **MaybeRV64G; diff --git a/llvm/unittests/Support/ScopedPrinterTest.cpp b/llvm/unittests/Support/ScopedPrinterTest.cpp index f62d310f25d95..9ebcb0b14bd43 100644 --- a/llvm/unittests/Support/ScopedPrinterTest.cpp +++ b/llvm/unittests/Support/ScopedPrinterTest.cpp @@ -510,7 +510,16 @@ FirstSecondThirdByteMask [ (0x333) } TEST_F(ScopedPrinterTest, PrintNumber) { - auto PrintFunc = [](ScopedPrinter &W) { + constexpr float MaxFloat = std::numeric_limits::max(); + constexpr float MinFloat = std::numeric_limits::min(); + constexpr float InfFloat = std::numeric_limits::infinity(); + const float NaNFloat = std::nanf("1"); + constexpr double MaxDouble = std::numeric_limits::max(); + constexpr double MinDouble = std::numeric_limits::min(); + constexpr double InfDouble = std::numeric_limits::infinity(); + const double NaNDouble = std::nan("1"); + + auto PrintFunc = [&](ScopedPrinter &W) { uint64_t Unsigned64Max = std::numeric_limits::max(); uint64_t Unsigned64Min = std::numeric_limits::min(); W.printNumber("uint64_t-max", Unsigned64Max); @@ -556,10 +565,6 @@ TEST_F(ScopedPrinterTest, PrintNumber) { W.printNumber("label", "value", 0); - float MaxFloat = std::numeric_limits::max(); - float MinFloat = std::numeric_limits::min(); - float InfFloat = std::numeric_limits::infinity(); - float NaNFloat = std::nanf("1"); W.printNumber("float-max", MaxFloat); W.printNumber("float-min", MinFloat); W.printNumber("float-inf", InfFloat); @@ -567,11 +572,7 @@ TEST_F(ScopedPrinterTest, PrintNumber) { W.printNumber("float-42.0", 42.0f); W.printNumber("float-42.5625", 42.5625f); - double MaxDouble = std::numeric_limits::max(); - double MinDouble = std::numeric_limits::min(); - double InfDouble = std::numeric_limits::infinity(); - double NaNDouble = std::nan("1"); - W.printNumber("double-max", MaxDouble); + W.printNumber("double-max", MaxDouble); W.printNumber("double-min", MinDouble); W.printNumber("double-inf", InfDouble); W.printNumber("double-nan", NaNDouble); @@ -583,29 +584,30 @@ TEST_F(ScopedPrinterTest, PrintNumber) { // implementation defined behavior. So format the max float/double, instead of // hard coding it in the tests. Note: we can't just use std::to_string(), // since we format the float in PrintNumber(). This isn't required for JSON - // formatting, since it uses exponents, which will be consistent. + // formatting, since it uses exponents, which will be consistent. However, + // NaN and INF may be printed differently, (like AIX), so we still need to + // handle those cases for JSON checking. // Allocate a buffer large enough to represent large floating point values // and construct the string representation for them there. char Buf[512]; - format("%5.1f", std::numeric_limits::max()).snprint(Buf, sizeof(Buf)); + format("%5.1f", MaxFloat).snprint(Buf, sizeof(Buf)); std::string MaxFloatStr(Buf); - format("%5.1f", std::numeric_limits::max()).snprint(Buf, sizeof(Buf)); + format("%5.1f", MaxDouble).snprint(Buf, sizeof(Buf)); std::string MaxDoubleStr(Buf); - format("%5.1f", std::numeric_limits::infinity()) - .snprint(Buf, sizeof(Buf)); + format("%5.1f", InfFloat).snprint(Buf, sizeof(Buf)); std::string InfFloatStr(Buf); - std::to_string(std::numeric_limits::infinity()); + format("%5.1f", InfDouble).snprint(Buf, sizeof(Buf)); std::string InfDoubleStr(Buf); - format("%5.1f", std::nanf("1")).snprint(Buf, sizeof(Buf)); + format("%5.1f", NaNFloat).snprint(Buf, sizeof(Buf)); std::string NaNFloatStr(Buf); - format("%5.1f", std::nan("1")).snprint(Buf, sizeof(Buf)); + format("%5.1f", NaNDouble).snprint(Buf, sizeof(Buf)); std::string NaNDoubleStr(Buf); std::string ExpectedOut = Twine( @@ -643,7 +645,7 @@ double-42.5625: 42.6 )") .str(); - const char *JSONExpectedOut = R"({ + std::string JSONExpectedOut = Twine(R"({ "uint64_t-max": 18446744073709551615, "uint64_t-min": 0, "uint32_t-max": 4294967295, @@ -667,17 +669,17 @@ double-42.5625: 42.6 }, "float-max": 3.4028234663852886e+38, "float-min": 1.1754943508222875e-38, - "float-inf": inf, - "float-nan": nan, + "float-inf": )" + std::to_string(InfFloat) + R"(, + "float-nan": )" + std::to_string(NaNFloat) + R"(, "float-42.0": 42, "float-42.5625": 42.5625, "double-max": 1.7976931348623157e+308, "double-min": 2.2250738585072014e-308, - "double-inf": inf, - "double-nan": nan, + "double-inf": )" + std::to_string(InfDouble) + R"(, + "double-nan": )" + std::to_string(NaNDouble) + R"(, "double-42.0": 42, "double-42.5625": 42.5625 -})"; +})").str(); verifyAll(ExpectedOut, JSONExpectedOut, PrintFunc); } diff --git a/llvm/unittests/TextAPI/TextStubV5Tests.cpp b/llvm/unittests/TextAPI/TextStubV5Tests.cpp index 3deb38a5a0a3d..b4e8f513daee2 100644 --- a/llvm/unittests/TextAPI/TextStubV5Tests.cpp +++ b/llvm/unittests/TextAPI/TextStubV5Tests.cpp @@ -944,6 +944,50 @@ TEST(TBDv5, Target_Simulator) { EXPECT_EQ(*File, *WriteResultFile); } +TEST(TBDv5, Target_UnsupportedMinOS) { + static const char TBDv5File[] = R"({ +"tapi_tbd_version": 5, +"main_library": { + "target_info": [ + { + "target": "arm64-macos", + "min_deployment": "10.14" + }, + { + "target": "x86_64-macos", + "min_deployment": "10.14" + } + ], + "install_names":[ + { "name":"/S/L/F/Foo.framework/Foo" } + ] +}})"; + + Expected Result = + TextAPIReader::get(MemoryBufferRef(TBDv5File, "Test.tbd")); + EXPECT_TRUE(!!Result); + TBDFile File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V5, File->getFileType()); + TargetList ExpectedTargets = { + Target(AK_x86_64, PLATFORM_MACOS, VersionTuple(10, 14)), + Target(AK_arm64, PLATFORM_MACOS, VersionTuple(11, 0)), + }; + TargetList Targets{File->targets().begin(), File->targets().end()}; + llvm::sort(Targets); + EXPECT_EQ(Targets, ExpectedTargets); + + SmallString<4096> Buffer; + raw_svector_ostream OS(Buffer); + Error WriteResult = TextAPIWriter::writeToStream(OS, *File); + EXPECT_TRUE(!WriteResult); + + Expected Output = + TextAPIReader::get(MemoryBufferRef(Buffer, "Output.tbd")); + EXPECT_TRUE(!!Output); + TBDFile WriteResultFile = std::move(Output.get()); + EXPECT_EQ(*File, *WriteResultFile); +} + TEST(TBDv5, MisspelledKey) { static const char TBDv5File[] = R"({ "tapi_tbd_version": 5, diff --git a/llvm/unittests/Transforms/Utils/LocalTest.cpp b/llvm/unittests/Transforms/Utils/LocalTest.cpp index d6b09b35f2caf..443f1f09915fd 100644 --- a/llvm/unittests/Transforms/Utils/LocalTest.cpp +++ b/llvm/unittests/Transforms/Utils/LocalTest.cpp @@ -598,7 +598,8 @@ TEST(Local, SimplifyVScaleWithRange) { // Test that simplifyCall won't try to query it's parent function for // vscale_range attributes in order to simplify llvm.vscale -> constant. - EXPECT_EQ(simplifyCall(CI, SimplifyQuery(M.getDataLayout())), nullptr); + EXPECT_EQ(simplifyCall(CI, VScale, {}, SimplifyQuery(M.getDataLayout())), + nullptr); delete CI; } diff --git a/llvm/utils/TableGen/CodeGenHwModes.cpp b/llvm/utils/TableGen/CodeGenHwModes.cpp index 99a97e89e60c5..2171507f4c63f 100644 --- a/llvm/utils/TableGen/CodeGenHwModes.cpp +++ b/llvm/utils/TableGen/CodeGenHwModes.cpp @@ -65,23 +65,16 @@ void HwModeSelect::dump() const { } CodeGenHwModes::CodeGenHwModes(RecordKeeper &RK) : Records(RK) { - std::vector MRs = Records.getAllDerivedDefinitions("HwMode"); - // The default mode needs a definition in the .td sources for TableGen - // to accept references to it. We need to ignore the definition here. - for (auto I = MRs.begin(), E = MRs.end(); I != E; ++I) { - if ((*I)->getName() != DefaultModeName) + for (Record *R : Records.getAllDerivedDefinitions("HwMode")) { + // The default mode needs a definition in the .td sources for TableGen + // to accept references to it. We need to ignore the definition here. + if (R->getName() == DefaultModeName) continue; - MRs.erase(I); - break; - } - - for (Record *R : MRs) { Modes.emplace_back(R); ModeIds.insert(std::make_pair(R, Modes.size())); } - std::vector MSs = Records.getAllDerivedDefinitions("HwModeSelect"); - for (Record *R : MSs) { + for (Record *R : Records.getAllDerivedDefinitions("HwModeSelect")) { auto P = ModeSelects.emplace(std::make_pair(R, HwModeSelect(R, *this))); assert(P.second); (void)P; diff --git a/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn index 2f9db59141183..cfd54004a9ff3 100644 --- a/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn @@ -10,6 +10,7 @@ static_library("FlowSensitive") { "DataflowAnalysisContext.cpp", "DataflowEnvironment.cpp", "DebugSupport.cpp", + "Logger.cpp", "Transfer.cpp", "TypeErasedDataflowAnalysis.cpp", "Value.cpp", diff --git a/llvm/utils/gn/secondary/clang/test/BUILD.gn b/llvm/utils/gn/secondary/clang/test/BUILD.gn index 480e1cd5a89c4..c88db82c10192 100644 --- a/llvm/utils/gn/secondary/clang/test/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/test/BUILD.gn @@ -176,6 +176,7 @@ group("test") { "//llvm/tools/llvm-nm:symlinks", "//llvm/tools/llvm-objcopy:symlinks", "//llvm/tools/llvm-objdump:symlinks", + "//llvm/tools/llvm-pdbutil", "//llvm/tools/llvm-profdata", "//llvm/tools/llvm-rc:symlinks", "//llvm/tools/llvm-readobj:symlinks", diff --git a/llvm/utils/gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn index a3a3966fed26b..22eb6721272ba 100644 --- a/llvm/utils/gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn @@ -23,6 +23,7 @@ unittest("ClangAnalysisFlowSensitiveTests") { "DataflowAnalysisContextTest.cpp", "DataflowEnvironmentTest.cpp", "DebugSupportTest.cpp", + "LoggerTest.cpp", "MapLatticeTest.cpp", "MatchSwitchTest.cpp", "MultiVarConstantPropagationTest.cpp", diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 764f12ce21ec2..8372efe72f7eb 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -369,6 +369,7 @@ if (current_toolchain == default_toolchain) { "__concepts/semiregular.h", "__concepts/swappable.h", "__concepts/totally_ordered.h", + "__condition_variable/condition_variable.h", "__config", "__coroutine/coroutine_handle.h", "__coroutine/coroutine_traits.h", @@ -549,7 +550,10 @@ if (current_toolchain == default_toolchain) { "__memory_resource/pool_options.h", "__memory_resource/synchronized_pool_resource.h", "__memory_resource/unsynchronized_pool_resource.h", - "__mutex_base", + "__mutex/lock_guard.h", + "__mutex/mutex.h", + "__mutex/tag_types.h", + "__mutex/unique_lock.h", "__node_handle", "__numeric/accumulate.h", "__numeric/adjacent_difference.h", diff --git a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/JITLink/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/JITLink/BUILD.gn index de4074c3bfb64..edd0f2a3539d6 100644 --- a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/JITLink/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/JITLink/BUILD.gn @@ -25,6 +25,7 @@ static_library("JITLink") { "EHFrameSupport.cpp", "ELF.cpp", "ELFLinkGraphBuilder.cpp", + "ELF_aarch32.cpp", "ELF_aarch64.cpp", "ELF_i386.cpp", "ELF_loongarch.cpp", @@ -37,6 +38,7 @@ static_library("JITLink") { "MachOLinkGraphBuilder.cpp", "MachO_arm64.cpp", "MachO_x86_64.cpp", + "aarch32.cpp", "aarch64.cpp", "i386.cpp", "loongarch.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/JITLink/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/JITLink/BUILD.gn index d0d5225c9d6c9..d0f99ce939cfe 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/JITLink/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/JITLink/BUILD.gn @@ -14,6 +14,7 @@ unittest("JITLinkTests") { "//llvm/lib/Testing/Support", ] sources = [ + "AArch32Tests.cpp", "EHFrameSupportTests.cpp", "LinkGraphTests.cpp", ] diff --git a/mlir/docs/Dialects/LLVM.md b/mlir/docs/Dialects/LLVM.md index 53d4dfbf686e7..c41d7254a378c 100644 --- a/mlir/docs/Dialects/LLVM.md +++ b/mlir/docs/Dialects/LLVM.md @@ -105,6 +105,7 @@ values for thread-safety and concept parsimony reasons. Instead, regular values are produced by dedicated operations that have the corresponding semantics: [`llvm.mlir.constant`](#llvmmlirconstant-mlirllvmconstantop), [`llvm.mlir.undef`](#llvmmlirundef-mlirllvmundefop), +[`llvm.mlir.poison`](#llvmmlirpoison-mlirllvmpoisonop), [`llvm.mlir.null`](#llvmmlirnull-mlirllvmnullop). Note how these operations are prefixed with `mlir.` to indicate that they don't belong to LLVM IR but are only necessary to model it in MLIR. The values produced by these operations are diff --git a/mlir/docs/Rationale/UsageOfConst.md b/mlir/docs/Rationale/UsageOfConst.md index 102b948a0eac1..7a54a4e6de7f5 100644 --- a/mlir/docs/Rationale/UsageOfConst.md +++ b/mlir/docs/Rationale/UsageOfConst.md @@ -235,9 +235,9 @@ if (auto *dimOp = inst->dyn_cast()) { It is much better to eliminate them entirely, and just pass around `DimOp` directly. For example, instead of: -```C++ +```c++ LogicalResult mlir::getIndexSet(MutableArrayRef> forOps, - FlatAffineConstraints *domain) { + FlatAffineValueConstraints *domain) { ``` @@ -245,7 +245,7 @@ It is a lot nicer to just have: ```c++ LogicalResult mlir::getIndexSet(MutableArrayRef forOps, - FlatAffineConstraints *domain) { + FlatAffineValueConstraints *domain) { ``` Particularly since all of the `FooOp` classes are already semantically a smart diff --git a/mlir/include/mlir/Analysis/FlatLinearValueConstraints.h b/mlir/include/mlir/Analysis/FlatLinearValueConstraints.h new file mode 100644 index 0000000000000..abebd7328f823 --- /dev/null +++ b/mlir/include/mlir/Analysis/FlatLinearValueConstraints.h @@ -0,0 +1,561 @@ +//===- FlatLinearValueConstraints.h - Linear Constraints --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_ANALYSIS_FLATLINEARVALUECONSTRAINTS_H +#define MLIR_ANALYSIS_FLATLINEARVALUECONSTRAINTS_H + +#include "mlir/Analysis/Presburger/IntegerRelation.h" +#include "mlir/Analysis/Presburger/Matrix.h" +#include "mlir/IR/AffineExpr.h" +#include "mlir/IR/OpDefinition.h" +#include "mlir/Support/LogicalResult.h" +#include + +namespace mlir { + +class AffineMap; +class IntegerSet; +class MLIRContext; +class Value; +class MemRefType; +struct MutableAffineMap; + +namespace presburger { +class MultiAffineFunction; +} // namespace presburger + +/// FlatLinearConstraints is an extension of IntegerPolyhedron. It provides an +/// AffineExpr-based API. +class FlatLinearConstraints : public presburger::IntegerPolyhedron { +public: + /// Constructs a constraint system reserving memory for the specified number + /// of constraints and variables. `valArgs` are the optional SSA values + /// associated with each dimension/symbol. These must either be empty or match + /// the number of dimensions and symbols. + FlatLinearConstraints(unsigned numReservedInequalities, + unsigned numReservedEqualities, + unsigned numReservedCols, unsigned numDims, + unsigned numSymbols, unsigned numLocals) + : IntegerPolyhedron(numReservedInequalities, numReservedEqualities, + numReservedCols, + presburger::PresburgerSpace::getSetSpace( + numDims, numSymbols, numLocals)) { + assert(numReservedCols >= getNumVars() + 1); + } + + /// Constructs a constraint system with the specified number of dimensions + /// and symbols. `valArgs` are the optional SSA values associated with each + /// dimension/symbol. These must either be empty or match the number of + /// dimensions and symbols. + FlatLinearConstraints(unsigned numDims = 0, unsigned numSymbols = 0, + unsigned numLocals = 0) + : FlatLinearConstraints(/*numReservedInequalities=*/0, + /*numReservedEqualities=*/0, + /*numReservedCols=*/numDims + numSymbols + + numLocals + 1, + numDims, numSymbols, numLocals) {} + + FlatLinearConstraints(const IntegerPolyhedron &fac) + : IntegerPolyhedron(fac) {} + + /// Return the kind of this object. + Kind getKind() const override { return Kind::FlatLinearConstraints; } + + static bool classof(const IntegerRelation *cst) { + return cst->getKind() >= Kind::FlatLinearConstraints && + cst->getKind() <= Kind::FlatAffineRelation; + } + + /// Clones this object. + std::unique_ptr clone() const; + + /// Adds a bound for the variable at the specified position with constraints + /// being drawn from the specified bound map. In case of an EQ bound, the + /// bound map is expected to have exactly one result. In case of a LB/UB, the + /// bound map may have more than one result, for each of which an inequality + /// is added. + /// + /// The bound can be added as open or closed by specifying isClosedBound. In + /// case of a LB/UB, isClosedBound = false means the bound is added internally + /// as a closed bound by +1/-1 respectively. In case of an EQ bound, it can + /// only be added as a closed bound. + /// + /// Note: The dimensions/symbols of this FlatLinearConstraints must match the + /// dimensions/symbols of the affine map. + LogicalResult addBound(presburger::BoundType type, unsigned pos, + AffineMap boundMap, bool isClosedBound); + + /// Adds a bound for the variable at the specified position with constraints + /// being drawn from the specified bound map. In case of an EQ bound, the + /// bound map is expected to have exactly one result. In case of a LB/UB, the + /// bound map may have more than one result, for each of which an inequality + /// is added. + /// Note: The dimensions/symbols of this FlatLinearConstraints must match the + /// dimensions/symbols of the affine map. By default the lower bound is closed + /// and the upper bound is open. + LogicalResult addBound(presburger::BoundType type, unsigned pos, + AffineMap boundMap); + + /// The `addBound` overload above hides the inherited overloads by default, so + /// we explicitly introduce them here. + using IntegerPolyhedron::addBound; + + /// Returns the constraint system as an integer set. Returns a null integer + /// set if the system has no constraints, or if an integer set couldn't be + /// constructed as a result of a local variable's explicit representation not + /// being known and such a local variable appearing in any of the constraints. + IntegerSet getAsIntegerSet(MLIRContext *context) const; + + /// Computes the lower and upper bounds of the first `num` dimensional + /// variables (starting at `offset`) as an affine map of the remaining + /// variables (dimensional and symbolic). This method is able to detect + /// variables as floordiv's and mod's of affine expressions of other + /// variables with respect to (positive) constants. Sets bound map to a + /// null AffineMap if such a bound can't be found (or yet unimplemented). + /// + /// By default the returned lower bounds are closed and upper bounds are open. + /// If `closedUb` is true, the upper bound is closed. + void getSliceBounds(unsigned offset, unsigned num, MLIRContext *context, + SmallVectorImpl *lbMaps, + SmallVectorImpl *ubMaps, + bool closedUB = false); + + /// Composes an affine map whose dimensions and symbols match one to one with + /// the dimensions and symbols of this FlatLinearConstraints. The results of + /// the map `other` are added as the leading dimensions of this constraint + /// system. Returns failure if `other` is a semi-affine map. + LogicalResult composeMatchingMap(AffineMap other); + + /// Gets the lower and upper bound of the `offset` + `pos`th variable + /// treating [0, offset) U [offset + num, symStartPos) as dimensions and + /// [symStartPos, getNumDimAndSymbolVars) as symbols, and `pos` lies in + /// [0, num). The multi-dimensional maps in the returned pair represent the + /// max and min of potentially multiple affine expressions. `localExprs` holds + /// pre-computed AffineExpr's for all local variables in the system. + /// + /// By default the returned lower bounds are closed and upper bounds are open. + /// If `closedUb` is true, the upper bound is closed. + std::pair + getLowerAndUpperBound(unsigned pos, unsigned offset, unsigned num, + unsigned symStartPos, ArrayRef localExprs, + MLIRContext *context, bool closedUB = false) const; + + /// Insert variables of the specified kind at position `pos`. Positions are + /// relative to the kind of variable. The coefficient columns corresponding + /// to the added variables are initialized to zero. `vals` are the Values + /// corresponding to the variables. Values should not be used with + /// VarKind::Local since values can only be attached to non-local variables. + /// Return the absolute column position (i.e., not relative to the kind of + /// variable) of the first added variable. + /// + /// Note: Empty Values are allowed in `vals`. + unsigned insertDimVar(unsigned pos, unsigned num = 1) { + return insertVar(VarKind::SetDim, pos, num); + } + unsigned insertSymbolVar(unsigned pos, unsigned num = 1) { + return insertVar(VarKind::Symbol, pos, num); + } + unsigned insertLocalVar(unsigned pos, unsigned num = 1) { + return insertVar(VarKind::Local, pos, num); + } + + /// Append variables of the specified kind after the last variable of that + /// kind. The coefficient columns corresponding to the added variables are + /// initialized to zero. `vals` are the Values corresponding to the + /// variables. Return the absolute column position (i.e., not relative to the + /// kind of variable) of the first appended variable. + /// + /// Note: Empty Values are allowed in `vals`. + unsigned appendDimVar(unsigned num = 1) { + return appendVar(VarKind::SetDim, num); + } + unsigned appendSymbolVar(unsigned num = 1) { + return appendVar(VarKind::Symbol, num); + } + unsigned appendLocalVar(unsigned num = 1) { + return appendVar(VarKind::Local, num); + } + +protected: + using VarKind = presburger::VarKind; + + /// Compute an explicit representation for local vars. For all systems coming + /// from MLIR integer sets, maps, or expressions where local vars were + /// introduced to model floordivs and mods, this always succeeds. + LogicalResult computeLocalVars(SmallVectorImpl &memo, + MLIRContext *context) const; + + /// Given an affine map that is aligned with this constraint system: + /// * Flatten the map. + /// * Add newly introduced local columns at the beginning of this constraint + /// system (local column pos 0). + /// * Add equalities that define the new local columns to this constraint + /// system. + /// * Return the flattened expressions via `flattenedExprs`. + /// + /// Note: This is a shared helper function of `addLowerOrUpperBound` and + /// `composeMatchingMap`. + LogicalResult flattenAlignedMapAndMergeLocals( + AffineMap map, std::vector> *flattenedExprs); + + /// Prints the number of constraints, dimensions, symbols and locals in the + /// FlatLinearConstraints. Also, prints for each variable whether there is + /// an SSA Value attached to it. + void printSpace(raw_ostream &os) const override; +}; + +/// FlatLinearValueConstraints represents an extension of FlatLinearConstraints +/// where each non-local variable can have an SSA Value attached to it. +class FlatLinearValueConstraints : public FlatLinearConstraints { +public: + /// Constructs a constraint system reserving memory for the specified number + /// of constraints and variables. `valArgs` are the optional SSA values + /// associated with each dimension/symbol. These must either be empty or match + /// the number of dimensions and symbols. + FlatLinearValueConstraints(unsigned numReservedInequalities, + unsigned numReservedEqualities, + unsigned numReservedCols, unsigned numDims, + unsigned numSymbols, unsigned numLocals, + ArrayRef> valArgs) + : FlatLinearConstraints(numReservedInequalities, numReservedEqualities, + numReservedCols, numDims, numSymbols, numLocals) { + assert(valArgs.empty() || valArgs.size() == getNumDimAndSymbolVars()); + values.reserve(numReservedCols); + if (valArgs.empty()) + values.resize(getNumDimAndSymbolVars(), std::nullopt); + else + values.append(valArgs.begin(), valArgs.end()); + } + + /// Constructs a constraint system reserving memory for the specified number + /// of constraints and variables. `valArgs` are the optional SSA values + /// associated with each dimension/symbol. These must either be empty or match + /// the number of dimensions and symbols. + FlatLinearValueConstraints(unsigned numReservedInequalities, + unsigned numReservedEqualities, + unsigned numReservedCols, unsigned numDims, + unsigned numSymbols, unsigned numLocals, + ArrayRef valArgs) + : FlatLinearConstraints(numReservedInequalities, numReservedEqualities, + numReservedCols, numDims, numSymbols, numLocals) { + assert(valArgs.empty() || valArgs.size() == getNumDimAndSymbolVars()); + values.reserve(numReservedCols); + if (valArgs.empty()) + values.resize(getNumDimAndSymbolVars(), std::nullopt); + else + values.append(valArgs.begin(), valArgs.end()); + } + + /// Constructs a constraint system with the specified number of dimensions + /// and symbols. `valArgs` are the optional SSA values associated with each + /// dimension/symbol. These must either be empty or match the number of + /// dimensions and symbols. + FlatLinearValueConstraints(unsigned numDims, unsigned numSymbols, + unsigned numLocals, + ArrayRef> valArgs) + : FlatLinearValueConstraints(/*numReservedInequalities=*/0, + /*numReservedEqualities=*/0, + /*numReservedCols=*/numDims + numSymbols + + numLocals + 1, + numDims, numSymbols, numLocals, valArgs) {} + + /// Constructs a constraint system with the specified number of dimensions + /// and symbols. `valArgs` are the optional SSA values associated with each + /// dimension/symbol. These must either be empty or match the number of + /// dimensions and symbols. + FlatLinearValueConstraints(unsigned numDims = 0, unsigned numSymbols = 0, + unsigned numLocals = 0, + ArrayRef valArgs = {}) + : FlatLinearValueConstraints(/*numReservedInequalities=*/0, + /*numReservedEqualities=*/0, + /*numReservedCols=*/numDims + numSymbols + + numLocals + 1, + numDims, numSymbols, numLocals, valArgs) {} + + FlatLinearValueConstraints(const IntegerPolyhedron &fac, + ArrayRef> valArgs = {}) + : FlatLinearConstraints(fac) { + assert(valArgs.empty() || valArgs.size() == getNumDimAndSymbolVars()); + if (valArgs.empty()) + values.resize(getNumDimAndSymbolVars(), std::nullopt); + else + values.append(valArgs.begin(), valArgs.end()); + } + + /// Creates an affine constraint system from an IntegerSet. + explicit FlatLinearValueConstraints(IntegerSet set, ValueRange operands = {}); + + // Construct a hyperrectangular constraint set from ValueRanges that represent + // induction variables, lower and upper bounds. `ivs`, `lbs` and `ubs` are + // expected to match one to one. The order of variables and constraints is: + // + // ivs | lbs | ubs | eq/ineq + // ----+-----+-----+--------- + // 1 -1 0 >= 0 + // ----+-----+-----+--------- + // -1 0 1 >= 0 + // + // All dimensions as set as VarKind::SetDim. + static FlatLinearValueConstraints + getHyperrectangular(ValueRange ivs, ValueRange lbs, ValueRange ubs); + + /// Return the kind of this object. + Kind getKind() const override { return Kind::FlatLinearValueConstraints; } + + static bool classof(const IntegerRelation *cst) { + return cst->getKind() >= Kind::FlatLinearValueConstraints && + cst->getKind() <= Kind::FlatAffineRelation; + } + + /// Replaces the contents of this FlatLinearValueConstraints with `other`. + void clearAndCopyFrom(const IntegerRelation &other) override; + + /// Adds a constant bound for the variable associated with the given Value. + void addBound(presburger::BoundType type, Value val, int64_t value); + using FlatLinearConstraints::addBound; + + /// Returns the Value associated with the pos^th variable. Asserts if + /// no Value variable was associated. + inline Value getValue(unsigned pos) const { + assert(pos < getNumDimAndSymbolVars() && "Invalid position"); + assert(hasValue(pos) && "variable's Value not set"); + return *values[pos]; + } + + /// Returns the Values associated with variables in range [start, end). + /// Asserts if no Value was associated with one of these variables. + inline void getValues(unsigned start, unsigned end, + SmallVectorImpl *values) const { + assert(end <= getNumDimAndSymbolVars() && "invalid end position"); + assert(start <= end && "invalid start position"); + values->clear(); + values->reserve(end - start); + for (unsigned i = start; i < end; i++) + values->push_back(getValue(i)); + } + inline void getAllValues(SmallVectorImpl *values) const { + getValues(0, getNumDimAndSymbolVars(), values); + } + + inline ArrayRef> getMaybeValues() const { + return {values.data(), values.size()}; + } + + inline ArrayRef> + getMaybeValues(presburger::VarKind kind) const { + assert(kind != VarKind::Local && + "Local variables do not have any value attached to them."); + return {values.data() + getVarKindOffset(kind), getNumVarKind(kind)}; + } + + /// Returns true if the pos^th variable has an associated Value. + inline bool hasValue(unsigned pos) const { + assert(pos < getNumDimAndSymbolVars() && "Invalid position"); + return values[pos].has_value(); + } + + /// Returns true if at least one variable has an associated Value. + bool hasValues() const; + + unsigned appendDimVar(ValueRange vals); + using FlatLinearConstraints::appendDimVar; + + unsigned appendSymbolVar(ValueRange vals); + using FlatLinearConstraints::appendSymbolVar; + + unsigned insertDimVar(unsigned pos, ValueRange vals); + using FlatLinearConstraints::insertDimVar; + + unsigned insertSymbolVar(unsigned pos, ValueRange vals); + using FlatLinearConstraints::insertSymbolVar; + + unsigned insertVar(presburger::VarKind kind, unsigned pos, + unsigned num = 1) override; + unsigned insertVar(presburger::VarKind kind, unsigned pos, ValueRange vals); + + /// Removes variables in the column range [varStart, varLimit), and copies any + /// remaining valid data into place, updates member variables, and resizes + /// arrays as needed. + void removeVarRange(presburger::VarKind kind, unsigned varStart, + unsigned varLimit) override; + using IntegerPolyhedron::removeVarRange; + + /// Sets the Value associated with the pos^th variable. + inline void setValue(unsigned pos, Value val) { + assert(pos < getNumDimAndSymbolVars() && "invalid var position"); + values[pos] = val; + } + + /// Sets the Values associated with the variables in the range [start, end). + /// The range must contain only dim and symbol variables. + void setValues(unsigned start, unsigned end, ArrayRef values) { + assert(end <= getNumVars() && "invalid end position"); + assert(start <= end && "invalid start position"); + assert(values.size() == end - start && + "value should be provided for each variable in the range."); + for (unsigned i = start; i < end; ++i) + setValue(i, values[i - start]); + } + + /// Looks up the position of the variable with the specified Value. Returns + /// true if found (false otherwise). `pos` is set to the (column) position of + /// the variable. + bool findVar(Value val, unsigned *pos) const; + + /// Returns true if a variable with the specified Value exists, false + /// otherwise. + bool containsVar(Value val) const; + + /// Projects out the variable that is associate with Value. + void projectOut(Value val); + using IntegerPolyhedron::projectOut; + + /// Swap the posA^th variable with the posB^th variable. + void swapVar(unsigned posA, unsigned posB) override; + + /// Prints the number of constraints, dimensions, symbols and locals in the + /// FlatAffineValueConstraints. Also, prints for each variable whether there + /// is an SSA Value attached to it. + void printSpace(raw_ostream &os) const override; + + /// Align `map` with this constraint system based on `operands`. Each operand + /// must already have a corresponding dim/symbol in this constraint system. + AffineMap computeAlignedMap(AffineMap map, ValueRange operands) const; + + /// Merge and align the variables of `this` and `other` starting at + /// `offset`, so that both constraint systems get the union of the contained + /// variables that is dimension-wise and symbol-wise unique; both + /// constraint systems are updated so that they have the union of all + /// variables, with `this`'s original variables appearing first followed + /// by any of `other`'s variables that didn't appear in `this`. Local + /// variables in `other` that have the same division representation as local + /// variables in `this` are merged into one. + // E.g.: Input: `this` has (%i, %j) [%M, %N] + // `other` has (%k, %j) [%P, %N, %M] + // Output: both `this`, `other` have (%i, %j, %k) [%M, %N, %P] + // + void mergeAndAlignVarsWithOther(unsigned offset, + FlatLinearValueConstraints *other); + + /// Merge and align symbols of `this` and `other` such that both get union of + /// of symbols that are unique. Symbols in `this` and `other` should be + /// unique. Symbols with Value as `None` are considered to be inequal to all + /// other symbols. + void mergeSymbolVars(FlatLinearValueConstraints &other); + + /// Returns true if this constraint system and `other` are in the same + /// space, i.e., if they are associated with the same set of variables, + /// appearing in the same order. Returns false otherwise. + bool areVarsAlignedWithOther(const FlatLinearConstraints &other); + + /// Updates the constraints to be the smallest bounding (enclosing) box that + /// contains the points of `this` set and that of `other`, with the symbols + /// being treated specially. For each of the dimensions, the min of the lower + /// bounds (symbolic) and the max of the upper bounds (symbolic) is computed + /// to determine such a bounding box. `other` is expected to have the same + /// dimensional variables as this constraint system (in the same order). + /// + /// E.g.: + /// 1) this = {0 <= d0 <= 127}, + /// other = {16 <= d0 <= 192}, + /// output = {0 <= d0 <= 192} + /// 2) this = {s0 + 5 <= d0 <= s0 + 20}, + /// other = {s0 + 1 <= d0 <= s0 + 9}, + /// output = {s0 + 1 <= d0 <= s0 + 20} + /// 3) this = {0 <= d0 <= 5, 1 <= d1 <= 9} + /// other = {2 <= d0 <= 6, 5 <= d1 <= 15}, + /// output = {0 <= d0 <= 6, 1 <= d1 <= 15} + LogicalResult unionBoundingBox(const FlatLinearValueConstraints &other); + using IntegerPolyhedron::unionBoundingBox; + +protected: + /// Eliminates the variable at the specified position using Fourier-Motzkin + /// variable elimination, but uses Gaussian elimination if there is an + /// equality involving that variable. If the result of the elimination is + /// integer exact, `*isResultIntegerExact` is set to true. If `darkShadow` is + /// set to true, a potential under approximation (subset) of the rational + /// shadow / exact integer shadow is computed. + // See implementation comments for more details. + void fourierMotzkinEliminate(unsigned pos, bool darkShadow = false, + bool *isResultIntegerExact = nullptr) override; + + /// Returns false if the fields corresponding to various variable counts, or + /// equality/inequality buffer sizes aren't consistent; true otherwise. This + /// is meant to be used within an assert internally. + bool hasConsistentState() const override; + + /// Values corresponding to the (column) non-local variables of this + /// constraint system appearing in the order the variables correspond to + /// columns. Variables that aren't associated with any Value are set to + /// None. + SmallVector, 8> values; +}; + +/// Flattens 'expr' into 'flattenedExpr', which contains the coefficients of the +/// dimensions, symbols, and additional variables that represent floor divisions +/// of dimensions, symbols, and in turn other floor divisions. Returns failure +/// if 'expr' could not be flattened (i.e., semi-affine is not yet handled). +/// 'cst' contains constraints that connect newly introduced local variables +/// to existing dimensional and symbolic variables. See documentation for +/// AffineExprFlattener on how mod's and div's are flattened. +LogicalResult getFlattenedAffineExpr(AffineExpr expr, unsigned numDims, + unsigned numSymbols, + SmallVectorImpl *flattenedExpr, + FlatLinearConstraints *cst = nullptr); + +/// Flattens the result expressions of the map to their corresponding flattened +/// forms and set in 'flattenedExprs'. Returns failure if any expression in the +/// map could not be flattened (i.e., semi-affine is not yet handled). 'cst' +/// contains constraints that connect newly introduced local variables to +/// existing dimensional and / symbolic variables. See documentation for +/// AffineExprFlattener on how mod's and div's are flattened. For all affine +/// expressions that share the same operands (like those of an affine map), this +/// method should be used instead of repeatedly calling getFlattenedAffineExpr +/// since local variables added to deal with div's and mod's will be reused +/// across expressions. +LogicalResult +getFlattenedAffineExprs(AffineMap map, + std::vector> *flattenedExprs, + FlatLinearConstraints *cst = nullptr); +LogicalResult +getFlattenedAffineExprs(IntegerSet set, + std::vector> *flattenedExprs, + FlatLinearConstraints *cst = nullptr); + +LogicalResult +getMultiAffineFunctionFromMap(AffineMap map, + presburger::MultiAffineFunction &multiAff); + +/// Re-indexes the dimensions and symbols of an affine map with given `operands` +/// values to align with `dims` and `syms` values. +/// +/// Each dimension/symbol of the map, bound to an operand `o`, is replaced with +/// dimension `i`, where `i` is the position of `o` within `dims`. If `o` is not +/// in `dims`, replace it with symbol `i`, where `i` is the position of `o` +/// within `syms`. If `o` is not in `syms` either, replace it with a new symbol. +/// +/// Note: If a value appears multiple times as a dimension/symbol (or both), all +/// corresponding dim/sym expressions are replaced with the first dimension +/// bound to that value (or first symbol if no such dimension exists). +/// +/// The resulting affine map has `dims.size()` many dimensions and at least +/// `syms.size()` many symbols. +/// +/// The SSA values of the symbols of the resulting map are optionally returned +/// via `newSyms`. This is a concatenation of `syms` with the SSA values of the +/// newly added symbols. +/// +/// Note: As part of this re-indexing, dimensions may turn into symbols, or vice +/// versa. +AffineMap alignAffineMapWithValues(AffineMap map, ValueRange operands, + ValueRange dims, ValueRange syms, + SmallVector *newSyms = nullptr); + +} // namespace mlir + +#endif // MLIR_ANALYSIS_FLATLINEARVALUECONSTRAINTS_H diff --git a/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h b/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h index 347be26325e5a..9646894736de0 100644 --- a/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h +++ b/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h @@ -31,6 +31,9 @@ class PresburgerSet; class PresburgerRelation; struct SymbolicLexMin; +/// The type of bound: equal, lower bound or upper bound. +enum class BoundType { EQ, LB, UB }; + /// An IntegerRelation represents the set of points from a PresburgerSpace that /// satisfy a list of affine constraints. Affine constraints can be inequalities /// or equalities in the form: @@ -54,10 +57,12 @@ class IntegerRelation { public: /// All derived classes of IntegerRelation. enum class Kind { - FlatAffineConstraints, - FlatAffineValueConstraints, IntegerRelation, IntegerPolyhedron, + FlatLinearConstraints, + FlatLinearValueConstraints, + FlatAffineValueConstraints, + FlatAffineRelation }; /// Constructs a relation reserving memory for the specified number @@ -395,9 +400,6 @@ class IntegerRelation { /// to None. DivisionRepr getLocalReprs(std::vector *repr = nullptr) const; - /// The type of bound: equal, lower bound or upper bound. - enum BoundType { EQ, LB, UB }; - /// Adds a constant bound for the specified variable. void addBound(BoundType type, unsigned pos, const MPInt &value); void addBound(BoundType type, unsigned pos, int64_t value) { @@ -848,7 +850,8 @@ class IntegerPolyhedron : public IntegerRelation { Kind getKind() const override { return Kind::IntegerPolyhedron; } static bool classof(const IntegerRelation *cst) { - return cst->getKind() == Kind::IntegerPolyhedron; + return cst->getKind() >= Kind::IntegerPolyhedron && + cst->getKind() <= Kind::FlatAffineRelation; } // Clones this object. diff --git a/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h b/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h index 1b302f55422d8..e59836444cc19 100644 --- a/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h +++ b/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h @@ -13,6 +13,7 @@ #ifndef MLIR_DIALECT_AFFINE_ANALYSIS_AFFINESTRUCTURES_H #define MLIR_DIALECT_AFFINE_ANALYSIS_AFFINESTRUCTURES_H +#include "mlir/Analysis/FlatLinearValueConstraints.h" #include "mlir/Analysis/Presburger/IntegerRelation.h" #include "mlir/Analysis/Presburger/Matrix.h" #include "mlir/IR/AffineExpr.h" @@ -38,117 +39,20 @@ namespace presburger { class MultiAffineFunction; } // namespace presburger -/// FlatAffineValueConstraints represents an extension of IntegerPolyhedron -/// where each non-local variable can have an SSA Value attached to it. -class FlatAffineValueConstraints : public presburger::IntegerPolyhedron { +/// FlatAffineValueConstraints is an extension of FlatLinearValueConstraints +/// with helper functions for Affine dialect ops. +class FlatAffineValueConstraints : public FlatLinearValueConstraints { public: - /// Constructs a constraint system reserving memory for the specified number - /// of constraints and variables. `valArgs` are the optional SSA values - /// associated with each dimension/symbol. These must either be empty or match - /// the number of dimensions and symbols. - FlatAffineValueConstraints(unsigned numReservedInequalities, - unsigned numReservedEqualities, - unsigned numReservedCols, unsigned numDims, - unsigned numSymbols, unsigned numLocals, - ArrayRef> valArgs) - : IntegerPolyhedron(numReservedInequalities, numReservedEqualities, - numReservedCols, - presburger::PresburgerSpace::getSetSpace( - numDims, numSymbols, numLocals)) { - assert(numReservedCols >= getNumVars() + 1); - assert(valArgs.empty() || valArgs.size() == getNumDimAndSymbolVars()); - values.reserve(numReservedCols); - if (valArgs.empty()) - values.resize(getNumDimAndSymbolVars(), std::nullopt); - else - values.append(valArgs.begin(), valArgs.end()); - } - - /// Constructs a constraint system reserving memory for the specified number - /// of constraints and variables. `valArgs` are the optional SSA values - /// associated with each dimension/symbol. These must either be empty or match - /// the number of dimensions and symbols. - FlatAffineValueConstraints(unsigned numReservedInequalities, - unsigned numReservedEqualities, - unsigned numReservedCols, unsigned numDims, - unsigned numSymbols, unsigned numLocals, - ArrayRef valArgs = {}) - : IntegerPolyhedron(numReservedInequalities, numReservedEqualities, - numReservedCols, - presburger::PresburgerSpace::getSetSpace( - numDims, numSymbols, numLocals)) { - assert(numReservedCols >= getNumVars() + 1); - assert(valArgs.empty() || valArgs.size() == getNumDimAndSymbolVars()); - values.reserve(numReservedCols); - if (valArgs.empty()) - values.resize(getNumDimAndSymbolVars(), std::nullopt); - else - values.append(valArgs.begin(), valArgs.end()); - } + using FlatLinearValueConstraints::FlatLinearValueConstraints; - /// Constructs a constraint system with the specified number of dimensions - /// and symbols. `valArgs` are the optional SSA values associated with each - /// dimension/symbol. These must either be empty or match the number of - /// dimensions and symbols. - FlatAffineValueConstraints(unsigned numDims, unsigned numSymbols, - unsigned numLocals, - ArrayRef> valArgs) - : FlatAffineValueConstraints(/*numReservedInequalities=*/0, - /*numReservedEqualities=*/0, - /*numReservedCols=*/numDims + numSymbols + - numLocals + 1, - numDims, numSymbols, numLocals, valArgs) {} - - /// Constructs a constraint system with the specified number of dimensions - /// and symbols. `valArgs` are the optional SSA values associated with each - /// dimension/symbol. These must either be empty or match the number of - /// dimensions and symbols. - FlatAffineValueConstraints(unsigned numDims = 0, unsigned numSymbols = 0, - unsigned numLocals = 0, - ArrayRef valArgs = {}) - : FlatAffineValueConstraints(/*numReservedInequalities=*/0, - /*numReservedEqualities=*/0, - /*numReservedCols=*/numDims + numSymbols + - numLocals + 1, - numDims, numSymbols, numLocals, valArgs) {} - - FlatAffineValueConstraints(const IntegerPolyhedron &fac, - ArrayRef> valArgs = {}) - : IntegerPolyhedron(fac) { - assert(valArgs.empty() || valArgs.size() == getNumDimAndSymbolVars()); - if (valArgs.empty()) - values.resize(getNumDimAndSymbolVars(), std::nullopt); - else - values.append(valArgs.begin(), valArgs.end()); - } - - /// Creates an affine constraint system from an IntegerSet. - explicit FlatAffineValueConstraints(IntegerSet set, ValueRange operands = {}); - - // Construct a hyperrectangular constraint set from ValueRanges that represent - // induction variables, lower and upper bounds. `ivs`, `lbs` and `ubs` are - // expected to match one to one. The order of variables and constraints is: - // - // ivs | lbs | ubs | eq/ineq - // ----+-----+-----+--------- - // 1 -1 0 >= 0 - // ----+-----+-----+--------- - // -1 0 1 >= 0 - // - // All dimensions as set as VarKind::SetDim. - static FlatAffineValueConstraints - getHyperrectangular(ValueRange ivs, ValueRange lbs, ValueRange ubs); - - /// Return the kind of this FlatAffineConstraints. + /// Return the kind of this object. Kind getKind() const override { return Kind::FlatAffineValueConstraints; } static bool classof(const IntegerRelation *cst) { - return cst->getKind() == Kind::FlatAffineValueConstraints; + return cst->getKind() >= Kind::FlatAffineValueConstraints && + cst->getKind() <= Kind::FlatAffineRelation; } - /// Clones this object. - std::unique_ptr clone() const; - /// Adds constraints (lower and upper bounds) for the specified 'affine.for' /// operation's Value using IR information stored in its bound maps. The /// right variable is first looked up using `forOp`'s Value. Asserts if the @@ -191,95 +95,22 @@ class FlatAffineValueConstraints : public presburger::IntegerPolyhedron { /// the columns in the current one regarding numbers and values. void addAffineIfOpDomain(AffineIfOp ifOp); - /// Adds a bound for the variable at the specified position with constraints - /// being drawn from the specified bound map. In case of an EQ bound, the - /// bound map is expected to have exactly one result. In case of a LB/UB, the - /// bound map may have more than one result, for each of which an inequality - /// is added. - /// - /// The bound can be added as open or closed by specifying isClosedBound. In - /// case of a LB/UB, isClosedBound = false means the bound is added internally - /// as a closed bound by +1/-1 respectively. In case of an EQ bound, it can - /// only be added as a closed bound. - /// - /// Note: The dimensions/symbols of this FlatAffineConstraints must match the - /// dimensions/symbols of the affine map. - LogicalResult addBound(BoundType type, unsigned pos, AffineMap boundMap, - bool isClosedBound); - - /// Adds a bound for the variable at the specified position with constraints - /// being drawn from the specified bound map. In case of an EQ bound, the - /// bound map is expected to have exactly one result. In case of a LB/UB, the - /// bound map may have more than one result, for each of which an inequality - /// is added. - /// Note: The dimensions/symbols of this FlatAffineConstraints must match the - /// dimensions/symbols of the affine map. By default the lower bound is closed - /// and the upper bound is open. - LogicalResult addBound(BoundType type, unsigned pos, AffineMap boundMap); - /// Adds a bound for the variable at the specified position with constraints /// being drawn from the specified bound map and operands. In case of an /// EQ bound, the bound map is expected to have exactly one result. In case /// of a LB/UB, the bound map may have more than one result, for each of which /// an inequality is added. - LogicalResult addBound(BoundType type, unsigned pos, AffineMap boundMap, - ValueRange operands); - - /// Adds a constant bound for the variable associated with the given Value. - void addBound(BoundType type, Value val, int64_t value); - - /// The `addBound` overload above hides the inherited overloads by default, so - /// we explicitly introduce them here. - using IntegerPolyhedron::addBound; - - /// Returns the constraint system as an integer set. Returns a null integer - /// set if the system has no constraints, or if an integer set couldn't be - /// constructed as a result of a local variable's explicit representation not - /// being known and such a local variable appearing in any of the constraints. - IntegerSet getAsIntegerSet(MLIRContext *context) const; - - /// Computes the lower and upper bounds of the first `num` dimensional - /// variables (starting at `offset`) as an affine map of the remaining - /// variables (dimensional and symbolic). This method is able to detect - /// variables as floordiv's and mod's of affine expressions of other - /// variables with respect to (positive) constants. Sets bound map to a - /// null AffineMap if such a bound can't be found (or yet unimplemented). - /// - /// By default the returned lower bounds are closed and upper bounds are open. - /// If `closedUb` is true, the upper bound is closed. - void getSliceBounds(unsigned offset, unsigned num, MLIRContext *context, - SmallVectorImpl *lbMaps, - SmallVectorImpl *ubMaps, - bool closedUB = false); - - /// Composes an affine map whose dimensions and symbols match one to one with - /// the dimensions and symbols of this FlatAffineConstraints. The results of - /// the map `other` are added as the leading dimensions of this constraint - /// system. Returns failure if `other` is a semi-affine map. - LogicalResult composeMatchingMap(AffineMap other); - - /// Gets the lower and upper bound of the `offset` + `pos`th variable - /// treating [0, offset) U [offset + num, symStartPos) as dimensions and - /// [symStartPos, getNumDimAndSymbolVars) as symbols, and `pos` lies in - /// [0, num). The multi-dimensional maps in the returned pair represent the - /// max and min of potentially multiple affine expressions. `localExprs` holds - /// pre-computed AffineExpr's for all local variables in the system. - /// - /// By default the returned lower bounds are closed and upper bounds are open. - /// If `closedUb` is true, the upper bound is closed. - std::pair - getLowerAndUpperBound(unsigned pos, unsigned offset, unsigned num, - unsigned symStartPos, ArrayRef localExprs, - MLIRContext *context, bool closedUB = false) const; + LogicalResult addBound(presburger::BoundType type, unsigned pos, + AffineMap boundMap, ValueRange operands); + using FlatLinearValueConstraints::addBound; - /// Returns the bound for the variable at `pos` from the inequality at - /// `ineqPos` as a 1-d affine value map (affine map + operands). The returned - /// affine value map can either be a lower bound or an upper bound depending - /// on the sign of atIneq(ineqPos, pos). Asserts if the row at `ineqPos` does - /// not involve the `pos`th variable. - void getIneqAsAffineValueMap(unsigned pos, unsigned ineqPos, - AffineValueMap &vmap, - MLIRContext *context) const; + /// Add the specified values as a dim or symbol var depending on its nature, + /// if it already doesn't exist in the system. `val` has to be either a + /// terminal symbol or a loop IV, i.e., it cannot be the result affine.apply + /// of any symbols or loop IVs. The variable is added to the end of the + /// existing dims or symbols. Additional information on the variable is + /// extracted from the IR and added to the constraint system. + void addInductionVarOrTerminalSymbol(Value val); /// Adds slice lower bounds represented by lower bounds in `lbMaps` and upper /// bounds in `ubMaps` to each variable in the constraint system which has @@ -292,79 +123,17 @@ class FlatAffineValueConstraints : public presburger::IntegerPolyhedron { ArrayRef ubMaps, ArrayRef operands); - /// Looks up the position of the variable with the specified Value. Returns - /// true if found (false otherwise). `pos` is set to the (column) position of - /// the variable. - bool findVar(Value val, unsigned *pos) const; - - /// Returns true if an variable with the specified Value exists, false - /// otherwise. - bool containsVar(Value val) const; - - /// Swap the posA^th variable with the posB^th variable. - void swapVar(unsigned posA, unsigned posB) override; - - /// Insert variables of the specified kind at position `pos`. Positions are - /// relative to the kind of variable. The coefficient columns corresponding - /// to the added variables are initialized to zero. `vals` are the Values - /// corresponding to the variables. Values should not be used with - /// VarKind::Local since values can only be attached to non-local variables. - /// Return the absolute column position (i.e., not relative to the kind of - /// variable) of the first added variable. - /// - /// Note: Empty Values are allowed in `vals`. - unsigned insertDimVar(unsigned pos, unsigned num = 1) { - return insertVar(VarKind::SetDim, pos, num); - } - unsigned insertSymbolVar(unsigned pos, unsigned num = 1) { - return insertVar(VarKind::Symbol, pos, num); - } - unsigned insertLocalVar(unsigned pos, unsigned num = 1) { - return insertVar(VarKind::Local, pos, num); - } - unsigned insertDimVar(unsigned pos, ValueRange vals); - unsigned insertSymbolVar(unsigned pos, ValueRange vals); - unsigned insertVar(presburger::VarKind kind, unsigned pos, - unsigned num = 1) override; - unsigned insertVar(presburger::VarKind kind, unsigned pos, ValueRange vals); - - /// Append variables of the specified kind after the last variable of that - /// kind. The coefficient columns corresponding to the added variables are - /// initialized to zero. `vals` are the Values corresponding to the - /// variables. Return the absolute column position (i.e., not relative to the - /// kind of variable) of the first appended variable. - /// - /// Note: Empty Values are allowed in `vals`. - unsigned appendDimVar(ValueRange vals); - unsigned appendSymbolVar(ValueRange vals); - unsigned appendDimVar(unsigned num = 1) { - return appendVar(VarKind::SetDim, num); - } - unsigned appendSymbolVar(unsigned num = 1) { - return appendVar(VarKind::Symbol, num); - } - unsigned appendLocalVar(unsigned num = 1) { - return appendVar(VarKind::Local, num); - } - - /// Removes variables in the column range [varStart, varLimit), and copies any - /// remaining valid data into place, updates member variables, and resizes - /// arrays as needed. - void removeVarRange(presburger::VarKind kind, unsigned varStart, - unsigned varLimit) override; - using IntegerPolyhedron::removeVarRange; - - /// Add the specified values as a dim or symbol var depending on its nature, - /// if it already doesn't exist in the system. `val` has to be either a - /// terminal symbol or a loop IV, i.e., it cannot be the result affine.apply - /// of any symbols or loop IVs. The variable is added to the end of the - /// existing dims or symbols. Additional information on the variable is - /// extracted from the IR and added to the constraint system. - void addInductionVarOrTerminalSymbol(Value val); + /// Changes all symbol variables which are loop IVs to dim variables. + void convertLoopIVSymbolsToDims(); - /// Align `map` with this constraint system based on `operands`. Each operand - /// must already have a corresponding dim/symbol in this constraint system. - AffineMap computeAlignedMap(AffineMap map, ValueRange operands) const; + /// Returns the bound for the variable at `pos` from the inequality at + /// `ineqPos` as a 1-d affine value map (affine map + operands). The returned + /// affine value map can either be a lower bound or an upper bound depending + /// on the sign of atIneq(ineqPos, pos). Asserts if the row at `ineqPos` does + /// not involve the `pos`th variable. + void getIneqAsAffineValueMap(unsigned pos, unsigned ineqPos, + AffineValueMap &vmap, + MLIRContext *context) const; /// Composes the affine value map with this FlatAffineValueConstrains, adding /// the results of the map as dimensions at the front @@ -373,168 +142,10 @@ class FlatAffineValueConstraints : public presburger::IntegerPolyhedron { /// /// Returns failure if the composition fails (when vMap is a semi-affine map). /// The vMap's operand Value's are used to look up the right positions in - /// the FlatAffineConstraints with which to associate. Every operand of vMap - /// should have a matching dim/symbol column in this constraint system (with - /// the same associated Value). + /// the FlatAffineValueConstraints with which to associate. Every operand of + /// vMap should have a matching dim/symbol column in this constraint system + /// (with the same associated Value). LogicalResult composeMap(const AffineValueMap *vMap); - - /// Projects out the variable that is associate with Value. - void projectOut(Value val); - using IntegerPolyhedron::projectOut; - - /// Changes all symbol variables which are loop IVs to dim variables. - void convertLoopIVSymbolsToDims(); - - /// Updates the constraints to be the smallest bounding (enclosing) box that - /// contains the points of `this` set and that of `other`, with the symbols - /// being treated specially. For each of the dimensions, the min of the lower - /// bounds (symbolic) and the max of the upper bounds (symbolic) is computed - /// to determine such a bounding box. `other` is expected to have the same - /// dimensional variables as this constraint system (in the same order). - /// - /// E.g.: - /// 1) this = {0 <= d0 <= 127}, - /// other = {16 <= d0 <= 192}, - /// output = {0 <= d0 <= 192} - /// 2) this = {s0 + 5 <= d0 <= s0 + 20}, - /// other = {s0 + 1 <= d0 <= s0 + 9}, - /// output = {s0 + 1 <= d0 <= s0 + 20} - /// 3) this = {0 <= d0 <= 5, 1 <= d1 <= 9} - /// other = {2 <= d0 <= 6, 5 <= d1 <= 15}, - /// output = {0 <= d0 <= 6, 1 <= d1 <= 15} - LogicalResult unionBoundingBox(const FlatAffineValueConstraints &other); - using IntegerPolyhedron::unionBoundingBox; - - /// Merge and align the variables of `this` and `other` starting at - /// `offset`, so that both constraint systems get the union of the contained - /// variables that is dimension-wise and symbol-wise unique; both - /// constraint systems are updated so that they have the union of all - /// variables, with `this`'s original variables appearing first followed - /// by any of `other`'s variables that didn't appear in `this`. Local - /// variables in `other` that have the same division representation as local - /// variables in `this` are merged into one. - // E.g.: Input: `this` has (%i, %j) [%M, %N] - // `other` has (%k, %j) [%P, %N, %M] - // Output: both `this`, `other` have (%i, %j, %k) [%M, %N, %P] - // - void mergeAndAlignVarsWithOther(unsigned offset, - FlatAffineValueConstraints *other); - - /// Returns true if this constraint system and `other` are in the same - /// space, i.e., if they are associated with the same set of variables, - /// appearing in the same order. Returns false otherwise. - bool areVarsAlignedWithOther(const FlatAffineValueConstraints &other); - - /// Replaces the contents of this FlatAffineValueConstraints with `other`. - void clearAndCopyFrom(const IntegerRelation &other) override; - - /// Returns the Value associated with the pos^th variable. Asserts if - /// no Value variable was associated. - inline Value getValue(unsigned pos) const { - assert(pos < getNumDimAndSymbolVars() && "Invalid position"); - assert(hasValue(pos) && "variable's Value not set"); - return *values[pos]; - } - - /// Returns true if the pos^th variable has an associated Value. - inline bool hasValue(unsigned pos) const { - assert(pos < getNumDimAndSymbolVars() && "Invalid position"); - return values[pos].has_value(); - } - - /// Returns true if at least one variable has an associated Value. - bool hasValues() const; - - /// Returns the Values associated with variables in range [start, end). - /// Asserts if no Value was associated with one of these variables. - inline void getValues(unsigned start, unsigned end, - SmallVectorImpl *values) const { - assert(end <= getNumDimAndSymbolVars() && "invalid end position"); - assert(start <= end && "invalid start position"); - values->clear(); - values->reserve(end - start); - for (unsigned i = start; i < end; i++) - values->push_back(getValue(i)); - } - inline void getAllValues(SmallVectorImpl *values) const { - getValues(0, getNumDimAndSymbolVars(), values); - } - - inline ArrayRef> getMaybeValues() const { - return {values.data(), values.size()}; - } - - inline ArrayRef> - getMaybeValues(presburger::VarKind kind) const { - assert(kind != VarKind::Local && - "Local variables do not have any value attached to them."); - return {values.data() + getVarKindOffset(kind), getNumVarKind(kind)}; - } - - /// Sets the Value associated with the pos^th variable. - inline void setValue(unsigned pos, Value val) { - assert(pos < getNumDimAndSymbolVars() && "invalid var position"); - values[pos] = val; - } - - /// Sets the Values associated with the variables in the range [start, end). - /// The range must contain only dim and symbol variables. - void setValues(unsigned start, unsigned end, ArrayRef values) { - assert(end <= getNumVars() && "invalid end position"); - assert(start <= end && "invalid start position"); - assert(values.size() == end - start && - "value should be provided for each variable in the range."); - for (unsigned i = start; i < end; ++i) - setValue(i, values[i - start]); - } - - /// Merge and align symbols of `this` and `other` such that both get union of - /// of symbols that are unique. Symbols in `this` and `other` should be - /// unique. Symbols with Value as `None` are considered to be inequal to all - /// other symbols. - void mergeSymbolVars(FlatAffineValueConstraints &other); - -protected: - using VarKind = presburger::VarKind; - - /// Returns false if the fields corresponding to various variable counts, or - /// equality/inequality buffer sizes aren't consistent; true otherwise. This - /// is meant to be used within an assert internally. - bool hasConsistentState() const override; - - /// Given an affine map that is aligned with this constraint system: - /// * Flatten the map. - /// * Add newly introduced local columns at the beginning of this constraint - /// system (local column pos 0). - /// * Add equalities that define the new local columns to this constraint - /// system. - /// * Return the flattened expressions via `flattenedExprs`. - /// - /// Note: This is a shared helper function of `addLowerOrUpperBound` and - /// `composeMatchingMap`. - LogicalResult flattenAlignedMapAndMergeLocals( - AffineMap map, std::vector> *flattenedExprs); - - /// Eliminates the variable at the specified position using Fourier-Motzkin - /// variable elimination, but uses Gaussian elimination if there is an - /// equality involving that variable. If the result of the elimination is - /// integer exact, `*isResultIntegerExact` is set to true. If `darkShadow` is - /// set to true, a potential under approximation (subset) of the rational - /// shadow / exact integer shadow is computed. - // See implementation comments for more details. - void fourierMotzkinEliminate(unsigned pos, bool darkShadow = false, - bool *isResultIntegerExact = nullptr) override; - - /// Prints the number of constraints, dimensions, symbols and locals in the - /// FlatAffineConstraints. Also, prints for each variable whether there is - /// an SSA Value attached to it. - void printSpace(raw_ostream &os) const override; - - /// Values corresponding to the (column) non-local variables of this - /// constraint system appearing in the order the variables correspond to - /// columns. Variables that aren't associated with any Value are set to - /// None. - SmallVector, 8> values; }; /// A FlatAffineRelation represents a set of ordered pairs (domain -> range) @@ -570,6 +181,13 @@ class FlatAffineRelation : public FlatAffineValueConstraints { : FlatAffineValueConstraints(fac), numDomainDims(numDomainDims), numRangeDims(numRangeDims) {} + /// Return the kind of this object. + Kind getKind() const override { return Kind::FlatAffineRelation; } + + static bool classof(const IntegerRelation *cst) { + return cst->getKind() == Kind::FlatAffineRelation; + } + /// Returns a set corresponding to the domain/range of the affine relation. FlatAffineValueConstraints getDomainSet() const; FlatAffineValueConstraints getRangeSet() const; @@ -616,66 +234,6 @@ class FlatAffineRelation : public FlatAffineValueConstraints { unsigned numRangeDims; }; -/// Flattens 'expr' into 'flattenedExpr', which contains the coefficients of the -/// dimensions, symbols, and additional variables that represent floor divisions -/// of dimensions, symbols, and in turn other floor divisions. Returns failure -/// if 'expr' could not be flattened (i.e., semi-affine is not yet handled). -/// 'cst' contains constraints that connect newly introduced local variables -/// to existing dimensional and symbolic variables. See documentation for -/// AffineExprFlattener on how mod's and div's are flattened. -LogicalResult getFlattenedAffineExpr(AffineExpr expr, unsigned numDims, - unsigned numSymbols, - SmallVectorImpl *flattenedExpr, - FlatAffineValueConstraints *cst = nullptr); - -/// Flattens the result expressions of the map to their corresponding flattened -/// forms and set in 'flattenedExprs'. Returns failure if any expression in the -/// map could not be flattened (i.e., semi-affine is not yet handled). 'cst' -/// contains constraints that connect newly introduced local variables to -/// existing dimensional and / symbolic variables. See documentation for -/// AffineExprFlattener on how mod's and div's are flattened. For all affine -/// expressions that share the same operands (like those of an affine map), this -/// method should be used instead of repeatedly calling getFlattenedAffineExpr -/// since local variables added to deal with div's and mod's will be reused -/// across expressions. -LogicalResult -getFlattenedAffineExprs(AffineMap map, - std::vector> *flattenedExprs, - FlatAffineValueConstraints *cst = nullptr); -LogicalResult -getFlattenedAffineExprs(IntegerSet set, - std::vector> *flattenedExprs, - FlatAffineValueConstraints *cst = nullptr); - -LogicalResult -getMultiAffineFunctionFromMap(AffineMap map, - presburger::MultiAffineFunction &multiAff); - -/// Re-indexes the dimensions and symbols of an affine map with given `operands` -/// values to align with `dims` and `syms` values. -/// -/// Each dimension/symbol of the map, bound to an operand `o`, is replaced with -/// dimension `i`, where `i` is the position of `o` within `dims`. If `o` is not -/// in `dims`, replace it with symbol `i`, where `i` is the position of `o` -/// within `syms`. If `o` is not in `syms` either, replace it with a new symbol. -/// -/// Note: If a value appears multiple times as a dimension/symbol (or both), all -/// corresponding dim/sym expressions are replaced with the first dimension -/// bound to that value (or first symbol if no such dimension exists). -/// -/// The resulting affine map has `dims.size()` many dimensions and at least -/// `syms.size()` many symbols. -/// -/// The SSA values of the symbols of the resulting map are optionally returned -/// via `newSyms`. This is a concatenation of `syms` with the SSA values of the -/// newly added symbols. -/// -/// Note: As part of this re-indexing, dimensions may turn into symbols, or vice -/// versa. -AffineMap alignAffineMapWithValues(AffineMap map, ValueRange operands, - ValueRange dims, ValueRange syms, - SmallVector *newSyms = nullptr); - /// Builds a relation from the given AffineMap/AffineValueMap `map`, containing /// all pairs of the form `operands -> result` that satisfy `map`. `rel` is set /// to the relation built. For example, give the AffineMap: @@ -696,6 +254,6 @@ LogicalResult getRelationFromMap(AffineMap &map, FlatAffineRelation &rel); LogicalResult getRelationFromMap(const AffineValueMap &map, FlatAffineRelation &rel); -} // namespace mlir. +} // namespace mlir #endif // MLIR_DIALECT_AFFINE_ANALYSIS_AFFINESTRUCTURES_H diff --git a/mlir/include/mlir/Dialect/Affine/ViewLikeInterfaceUtils.h b/mlir/include/mlir/Dialect/Affine/ViewLikeInterfaceUtils.h index 3fac9409bf381..42156ac5de24d 100644 --- a/mlir/include/mlir/Dialect/Affine/ViewLikeInterfaceUtils.h +++ b/mlir/include/mlir/Dialect/Affine/ViewLikeInterfaceUtils.h @@ -13,6 +13,7 @@ #include "mlir/Interfaces/ViewLikeInterface.h" namespace mlir { +class RewriterBase; /// Fills the `combinedOffsets`, `combinedSizes` and `combinedStrides` to use /// when combining a producer slice **into** a consumer slice. @@ -21,6 +22,7 @@ namespace mlir { /// - Combined offsets = producer_offsets * consumer_strides + consumer_offsets /// - Combined sizes = consumer_sizes /// - Combined strides = producer_strides * consumer_strides +// TODO: unify this API with resolveSourceIndicesOffsetsAndStrides or deprecate. LogicalResult mergeOffsetsSizesAndStrides(OpBuilder &builder, Location loc, ArrayRef producerOffsets, @@ -36,6 +38,7 @@ mergeOffsetsSizesAndStrides(OpBuilder &builder, Location loc, /// Fills the `combinedOffsets`, `combinedSizes` and `combinedStrides` to use /// when combining a `producer` slice op **into** a `consumer` slice op. +// TODO: unify this API with resolveSourceIndicesOffsetsAndStrides or deprecate. LogicalResult mergeOffsetsSizesAndStrides(OpBuilder &builder, Location loc, OffsetSizeAndStrideOpInterface producer, @@ -45,6 +48,30 @@ mergeOffsetsSizesAndStrides(OpBuilder &builder, Location loc, SmallVector &combinedSizes, SmallVector &combinedStrides); +/// Given the 'indicesVals' of a load/store operation operating on an op with +/// offsets and strides, return the combined indices. +/// +/// For example, using `memref.load` and `memref.subview` as an illustration: +/// +/// ``` +/// %0 = ... : memref<12x42xf32> +/// %1 = memref.subview %0[%arg0, %arg1][...][%stride1, %stride2] : +/// memref<12x42xf32> to memref<4x4xf32, offset=?, strides=[?, ?]> +/// %2 = load %1[%i1, %i2] : memref<4x4xf32, offset=?, strides=[?, ?]> +/// ``` +/// +/// could be folded into: +/// +/// ``` +/// %2 = load %0[%arg0 + %i1 * %stride1][%arg1 + %i2 * %stride2] : +/// memref<12x42xf32> +/// ``` +void resolveSourceIndicesOffsetsAndStrides( + RewriterBase &rewriter, Location loc, ArrayRef mixedOffsets, + ArrayRef mixedStrides, + const llvm::SmallBitVector &rankReducedDims, ValueRange indicesVals, + SmallVectorImpl &sourceIndices); + } // namespace mlir #endif // MLIR_DIALECT_AFFINE_VIEWLIKEINTERFACEUTILS_H diff --git a/mlir/include/mlir/Dialect/Arith/Transforms/WideIntEmulationConverter.h b/mlir/include/mlir/Dialect/Arith/Transforms/WideIntEmulationConverter.h index ea0ab14e9b8f1..5dbbfedcc70ee 100644 --- a/mlir/include/mlir/Dialect/Arith/Transforms/WideIntEmulationConverter.h +++ b/mlir/include/mlir/Dialect/Arith/Transforms/WideIntEmulationConverter.h @@ -16,7 +16,7 @@ namespace mlir::arith { /// two halves and thus turning into supported ones, i.e., i2*N --> iN, where N /// is the widest integer bitwidth supported by the target. /// Currently, we only handle power-of-two integer types and support conversions -/// of integers twice as wide as the maxium supported by the target. Wide +/// of integers twice as wide as the maximum supported by the target. Wide /// integers are represented as vectors, e.g., i64 --> vector<2xi32>, where the /// first element is the low half of the original integer, and the second /// element the high half. diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index 1bbc32f3d2917..3abe1614f66fa 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -1652,7 +1652,7 @@ def LLVM_UndefOp : LLVM_Op<"mlir.undef", [Pure]>, Unlike LLVM IR, MLIR does not have first-class undefined values. Such values must be created as SSA values using `llvm.mlir.undef`. This operation has no operands or attributes. It creates an undefined value of the specified LLVM - IR dialect type wrapping an LLVM IR structure type. + IR dialect type. Example: @@ -1666,6 +1666,28 @@ def LLVM_UndefOp : LLVM_Op<"mlir.undef", [Pure]>, let assemblyFormat = "attr-dict `:` type($res)"; } +def LLVM_PoisonOp : LLVM_Op<"mlir.poison", [Pure]>, + LLVM_Builder<"$res = llvm::PoisonValue::get($_resultType);"> { + let summary = "Creates a poison value of LLVM dialect type."; + let description = [{ + Unlike LLVM IR, MLIR does not have first-class poison values. Such values + must be created as SSA values using `llvm.mlir.poison`. This operation has + no operands or attributes. It creates a poison value of the specified LLVM + IR dialect type. + + Example: + + ```mlir + // Create a poison value for a structure with a 32-bit integer followed + // by a float. + %0 = llvm.mlir.poison : !llvm.struct<(i32, f32)> + ``` + }]; + let results = (outs LLVM_Type:$res); + let builders = [LLVM_OneResultOpBuilder]; + let assemblyFormat = "attr-dict `:` type($res)"; +} + def LLVM_ConstantOp : LLVM_Op<"mlir.constant", [Pure, ConstantLike]>, LLVM_Builder<[{$res = getLLVMConstant($_resultType, $value, $_location, diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td index 66d6dcc7b27ed..721615fdd2607 100644 --- a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td +++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td @@ -858,6 +858,10 @@ def Tensor_InsertSliceOp : Tensor_OpWithOffsetSizesAndStrides<"insert_slice", [ return {rank, rank, rank}; } + /// Return the dimensions of the dest that are omitted to insert a source + /// when the result is rank-extended. + llvm::SmallBitVector getDroppedDims(); + /// Return the number of leading operands before the `offsets`, `sizes` and /// and `strides` operands. static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 2; } diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h b/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h index df695dbec19a7..48f9066934a25 100644 --- a/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h @@ -12,23 +12,27 @@ #include "mlir/Pass/Pass.h" namespace mlir { +namespace tensor { -#define GEN_PASS_DECL -#include "mlir/Dialect/Tensor/Transforms/Passes.h.inc" +//===----------------------------------------------------------------------===// +// Passes +//===----------------------------------------------------------------------===// -/// Creates an instance of `tensor` dialect bufferization pass. +/// Creates an instance of the `tensor` subset folding pass. +std::unique_ptr createFoldTensorSubsetOpsPass(); + +/// Creates an instance of the `tensor` dialect bufferization pass. std::unique_ptr createTensorBufferizePass(); //===----------------------------------------------------------------------===// // Registration //===----------------------------------------------------------------------===// -namespace tensor { /// Generate the code for registering passes. #define GEN_PASS_REGISTRATION #include "mlir/Dialect/Tensor/Transforms/Passes.h.inc" -} // namespace tensor +} // namespace tensor } // namespace mlir #endif // MLIR_DIALECT_TENSOR_TRANSFORMS_PASSES_H_ diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.td b/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.td index 2bf774d404bf5..b4673599a5def 100644 --- a/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.td @@ -11,9 +11,25 @@ include "mlir/Pass/PassBase.td" +def FoldTensorSubsetOps : Pass<"fold-tensor-subset-ops"> { + let summary = "Fold tensor subset ops into producer/consumer ops"; + let description = [{ + The pass folds tensor subset ops into producer/consumer ops. + + At the moment, the following foldings occur when possible: + - tensor.extract_slice into vector.transfer_read + - vector.transfer_write into tensor.insert_slice + + }]; + let constructor = "mlir::tensor::createFoldTensorSubsetOpsPass()"; + let dependentDialects = [ + "AffineDialect", "tensor::TensorDialect", "vector::VectorDialect" + ]; +} + def TensorBufferize : Pass<"tensor-bufferize", "func::FuncOp"> { let summary = "Bufferize the `tensor` dialect"; - let constructor = "mlir::createTensorBufferizePass()"; + let constructor = "mlir::tensor::createTensorBufferizePass()"; } #endif // MLIR_DIALECT_TENSOR_TRANSFORMS_PASSES diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h index 4cdf360c51d72..c0c46e9981dfa 100644 --- a/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h @@ -18,11 +18,9 @@ struct TilingResult; namespace tensor { -/// Populates `patterns` with patterns to wrap a tensor.pad op with an scf.if op -/// to separate the cases where we don't need padding (all pad sizes are -/// actually zeros) and where we indeed need padding. -void populateSplitPaddingPatterns(RewritePatternSet &patterns, - PatternBenefit baseBenefit = 1); +//===----------------------------------------------------------------------===// +// Patterns +//===----------------------------------------------------------------------===// /// Pattern to swap an `tensor.extract_slice` with its producer when the /// producer implements the `TilingInterface`. The pattern itself does not @@ -32,6 +30,23 @@ void populateSplitPaddingPatterns(RewritePatternSet &patterns, FailureOr replaceExtractSliceWithTiledProducer( OpBuilder &builder, tensor::ExtractSliceOp sliceOp, OpResult producerOp); +//===----------------------------------------------------------------------===// +// Populate functions. +//===----------------------------------------------------------------------===// + +/// Collects a set of patterns to rewrite ops within the tensor dialect. +void populateExpandOpsPatterns(RewritePatternSet &patterns); + +/// Appends patterns for folding tensor aliasing ops into consumer load/store +/// ops into `patterns`. +void populateFoldTensorSubsetOpPatterns(RewritePatternSet &patterns); + +/// Populates `patterns` with patterns to wrap a tensor.pad op with an scf.if op +/// to separate the cases where we don't need padding (all pad sizes are +/// actually zeros) and where we indeed need padding. +void populateSplitPaddingPatterns(RewritePatternSet &patterns, + PatternBenefit baseBenefit = 1); + /// Collects patterns to merge consecutive tensor.insert_slice/extract_slice /// into one. These patterns are in in this separate entry point because the /// bufferization is sensitive over IR structure, particularly those diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td index 7c8018ad64606..b6127f1ffa3cf 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td @@ -1556,6 +1556,7 @@ def Tosa_SliceOp: Tosa_Op<"slice", [ Tosa_Tensor1Dto6D:$output ); + let hasCanonicalizer = 1; let hasFolder = 1; } diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.h b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.h index 56f8b4bf22d21..4763b6525b934 100644 --- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.h +++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.h @@ -110,43 +110,11 @@ void populateFlattenVectorTransferPatterns(RewritePatternSet &patterns, void populateBubbleVectorBitCastOpPatterns(RewritePatternSet &patterns, PatternBenefit benefit = 1); -/// Collect a set of transfer read/write lowering patterns. -/// -/// These patterns lower transfer ops to simpler ops like `vector.load`, -/// `vector.store` and `vector.broadcast`. Only transfers with a transfer rank -/// of a most `maxTransferRank` are lowered. This is useful when combined with -/// VectorToSCF, which reduces the rank of vector transfer ops. -void populateVectorTransferLoweringPatterns( - RewritePatternSet &patterns, - std::optional maxTransferRank = std::nullopt, - PatternBenefit benefit = 1); - /// These patterns materialize masks for various vector ops such as transfers. void populateVectorMaskMaterializationPatterns(RewritePatternSet &patterns, bool force32BitVectorIndices, PatternBenefit benefit = 1); -/// Collects patterns to progressively lower vector.broadcast ops on high-D -/// vectors to low-D vector ops. -void populateVectorBroadcastLoweringPatterns(RewritePatternSet &patterns, - PatternBenefit benefit = 1); - -/// Collects patterns to progressively lower vector mask ops into elementary -/// selection and insertion ops. -void populateVectorMaskOpLoweringPatterns(RewritePatternSet &patterns, - PatternBenefit benefit = 1); - -/// Collects patterns to progressively lower vector.shape_cast ops on high-D -/// vectors into 1-D/2-D vector ops by generating data movement extract/insert -/// ops. -void populateVectorShapeCastLoweringPatterns(RewritePatternSet &patterns, - PatternBenefit benefit = 1); - -/// Collects patterns that lower scalar vector transfer ops to memref loads and -/// stores when beneficial. -void populateScalarVectorTransferLoweringPatterns(RewritePatternSet &patterns, - PatternBenefit benefit = 1); - /// Returns the integer type required for subscripts in the vector dialect. IntegerType getVectorSubscriptType(Builder &builder); @@ -214,8 +182,8 @@ void createMaskOpRegion(OpBuilder &builder, Operation *maskableOp); /// Creates a vector.mask operation around a maskable operation. Returns the /// vector.mask operation if the mask provided is valid. Otherwise, returns the /// maskable operation itself. -Operation *maskOperation(OpBuilder &builder, Operation *maskableOp, - Value mask, Value passthru = Value()); +Operation *maskOperation(OpBuilder &builder, Operation *maskableOp, Value mask, + Value passthru = Value()); /// Creates a vector select operation that picks values from `newValue` or /// `passthru` for each result vector lane based on `mask`. This utility is used diff --git a/mlir/include/mlir/Dialect/Vector/Transforms/LoweringPatterns.h b/mlir/include/mlir/Dialect/Vector/Transforms/LoweringPatterns.h new file mode 100644 index 0000000000000..dfadffba3883b --- /dev/null +++ b/mlir/include/mlir/Dialect/Vector/Transforms/LoweringPatterns.h @@ -0,0 +1,248 @@ +//===- LoweringPatterns.h - Vector rewrite patterns --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_VECTOR_TRANSFORMS_LOWERINGPATTERNS_H +#define MLIR_DIALECT_VECTOR_TRANSFORMS_LOWERINGPATTERNS_H + +#include "mlir/Dialect/Vector/Transforms/VectorTransforms.h" + +namespace mlir { +class RewritePatternSet; + +namespace vector { + +//===----------------------------------------------------------------------===// +// Lowering pattern populate functions +//===----------------------------------------------------------------------===// + +/// Populate the pattern set with the following patterns: +/// +/// [OuterProductOpLowering] +/// Progressively lower a `vector.outerproduct` to linearized +/// `vector.extract` + `vector.fma` + `vector.insert`. +/// +/// [ContractionOpLowering] +/// Progressive lowering of ContractionOp. +/// One: +/// %x = vector.contract with at least one free/batch dimension +/// is replaced by: +/// %a = vector.contract with one less free/batch dimension +/// %b = vector.contract with one less free/batch dimension +/// +/// [ContractionOpToMatmulOpLowering] +/// Progressively lower a `vector.contract` with row-major matmul semantics to +/// linearized `vector.shape_cast` + `vector.matmul` on the way to +/// `llvm.matrix.multiply`. +/// +/// [ContractionOpToDotLowering] +/// Progressively lower a `vector.contract` with row-major matmul semantics to +/// linearized `vector.extract` + `vector.reduce` + `vector.insert`. +/// +/// [ContractionOpToOuterProductOpLowering] +/// Progressively lower a `vector.contract` with row-major matmul semantics to +/// linearized `vector.extract` + `vector.outerproduct` + `vector.insert`. +void populateVectorContractLoweringPatterns( + RewritePatternSet &patterns, VectorTransformsOptions options, + PatternBenefit benefit = 1, bool disableOuterProductLowering = false); + +/// Collect a set of patterns to convert vector.multi_reduction op into +/// a sequence of vector.reduction ops. The patterns comprise: +/// +/// [InnerOuterDimReductionConversion] +/// Rewrites vector.multi_reduction such that all reduction dimensions are +/// either innermost or outermost, by adding the proper vector.transpose +/// operations. +/// +/// [ReduceMultiDimReductionRank] +/// Once in innermost or outermost reduction +/// form, rewrites n-D vector.multi_reduction into 2-D vector.multi_reduction, +/// by introducing vector.shape_cast ops to collapse + multi-reduce + expand +/// back. +/// +/// [TwoDimMultiReductionToElementWise] +/// Once in 2-D vector.multi_reduction form, with an **outermost** reduction +/// dimension, unroll the outer dimension to obtain a sequence of 1-D vector +/// ops. This also has an opportunity for tree-reduction (in the future). +/// +/// [TwoDimMultiReductionToReduction] +/// Once in 2-D vector.multi_reduction form, with an **innermost** reduction +/// dimension, unroll the outer dimension to obtain a sequence of extract + +/// vector.reduction + insert. This can further lower to horizontal reduction +/// ops. +/// +/// [OneDimMultiReductionToTwoDim] +/// For cases that reduce to 1-D vector reduction (and are thus missing +/// either a parallel or a reduction), we lift them back up to 2-D with a simple +/// vector.shape_cast to vector<1xk> so that the other patterns can kick in, +/// thus fully exiting out of the vector.multi_reduction abstraction. +void populateVectorMultiReductionLoweringPatterns( + RewritePatternSet &patterns, VectorMultiReductionLowering options, + PatternBenefit benefit = 1); + +/// Populate the pattern set with the following patterns: +/// +/// [TransferReadToVectorLoadLowering] +/// Progressive lowering of BroadcastOp to ExtractOp + InsertOp + lower-D +/// BroadcastOp until dim 1. +void populateVectorBroadcastLoweringPatterns(RewritePatternSet &patterns, + PatternBenefit benefit = 1); + +/// Populate the pattern set with the following patterns: +/// +/// [CreateMaskOp] +/// Progressive lowering of CreateMaskOp to lower-D CreateMaskOp until dim 1. +/// +/// [ConstantMaskOp] +/// Progressive lowering of ConstantMaskOp to lower-D ConstantMaskOp until +/// dim 1. +void populateVectorMaskOpLoweringPatterns(RewritePatternSet &patterns, + PatternBenefit benefit = 1); + +/// Collects patterns that lower scalar vector transfer ops to memref loads and +/// stores when beneficial. +void populateScalarVectorTransferLoweringPatterns(RewritePatternSet &patterns, + PatternBenefit benefit = 1); + +/// Populate the pattern set with the following patterns: +/// +/// [ShapeCastOp2DDownCastRewritePattern] +/// ShapeOp 2D -> 1D downcast serves the purpose of flattening 2-D to 1-D +/// vectors progressively. +/// +/// [ShapeCastOp2DUpCastRewritePattern] +/// ShapeOp 1D -> 2D upcast serves the purpose of unflattening 2-D from 1-D +/// vectors progressively. +/// +/// [ShapeCastOpRewritePattern] +/// Reference lowering to fully unrolled sequences of single element ExtractOp + +/// InsertOp. Note that applying this pattern can almost always be considered a +/// performance bug. +void populateVectorShapeCastLoweringPatterns(RewritePatternSet &patterns, + PatternBenefit benefit = 1); + +/// Populate the pattern set with the following patterns: +/// +/// [TransposeOpLowering] +/// +/// [TransposeOp2DToShuffleLowering] +/// +void populateVectorTransposeLoweringPatterns(RewritePatternSet &patterns, + VectorTransformsOptions options, + PatternBenefit benefit = 1); + +/// Populate the pattern set with the following patterns: +/// +/// [TransferReadToVectorLoadLowering] +/// Progressive lowering of transfer_read.This pattern supports lowering of +/// `vector.transfer_read` to a combination of `vector.load` and +/// `vector.broadcast` +/// +/// [TransferWriteToVectorStoreLowering] +/// Progressive lowering of transfer_write. This pattern supports lowering of +/// `vector.transfer_write` to `vector.store` +/// +/// [VectorLoadToMemrefLoadLowering] +/// Replace a 0-d vector.load with a memref.load + vector.broadcast. +/// +/// [VectorStoreToMemrefStoreLowering] +/// Replace a 0-d vector.store with a vector.extractelement + memref.store. +/// +/// These patterns lower transfer ops to simpler ops like `vector.load`, +/// `vector.store` and `vector.broadcast`. Only transfers with a transfer rank +/// of a most `maxTransferRank` are lowered. This is useful when combined with +/// VectorToSCF, which reduces the rank of vector transfer ops. +void populateVectorTransferLoweringPatterns( + RewritePatternSet &patterns, + std::optional maxTransferRank = std::nullopt, + PatternBenefit benefit = 1); + +/// Collect a set of transfer read/write lowering patterns that simplify the +/// permutation map (e.g., converting it to a minor identity map) by inserting +/// broadcasts and transposes. More specifically: +/// +/// [TransferReadPermutationLowering] +/// Lower transfer_read op with permutation into a transfer_read with a +/// permutation map composed of leading zeros followed by a minor identity + +/// vector.transpose op. +/// Ex: +/// vector.transfer_read ... +/// permutation_map: (d0, d1, d2) -> (0, d1) +/// into: +/// %v = vector.transfer_read ... +/// permutation_map: (d0, d1, d2) -> (d1, 0) +/// vector.transpose %v, [1, 0] +/// +/// vector.transfer_read ... +/// permutation_map: (d0, d1, d2, d3) -> (0, 0, 0, d1, d3) +/// into: +/// %v = vector.transfer_read ... +/// permutation_map: (d0, d1, d2, d3) -> (0, 0, d1, 0, d3) +/// vector.transpose %v, [0, 1, 3, 2, 4] +/// Note that an alternative is to transform it to linalg.transpose + +/// vector.transfer_read to do the transpose in memory instead. +/// +/// [TransferWritePermutationLowering] +/// Lower transfer_write op with permutation into a transfer_write with a +/// minor identity permutation map. (transfer_write ops cannot have broadcasts.) +/// Ex: +/// vector.transfer_write %v ... +/// permutation_map: (d0, d1, d2) -> (d2, d0, d1) +/// into: +/// %tmp = vector.transpose %v, [2, 0, 1] +/// vector.transfer_write %tmp ... +/// permutation_map: (d0, d1, d2) -> (d0, d1, d2) +/// +/// vector.transfer_write %v ... +/// permutation_map: (d0, d1, d2, d3) -> (d3, d2) +/// into: +/// %tmp = vector.transpose %v, [1, 0] +/// %v = vector.transfer_write %tmp ... +/// permutation_map: (d0, d1, d2, d3) -> (d2, d3) +/// +/// [TransferOpReduceRank] +/// Lower transfer_read op with broadcast in the leading dimensions into +/// transfer_read of lower rank + vector.broadcast. +/// Ex: vector.transfer_read ... +/// permutation_map: (d0, d1, d2, d3) -> (0, d1, 0, d3) +/// into: +/// %v = vector.transfer_read ... +/// permutation_map: (d0, d1, d2, d3) -> (d1, 0, d3) +/// vector.broadcast %v +void populateVectorTransferPermutationMapLoweringPatterns( + RewritePatternSet &patterns, PatternBenefit benefit = 1); + +/// Populate the pattern set with the following patterns: +/// +/// [ScanToArithOps] +/// Convert vector.scan op into arith ops and vector.insert_strided_slice / +/// vector.extract_strided_slice. +void populateVectorScanLoweringPatterns(RewritePatternSet &patterns, + PatternBenefit benefit = 1); + +/// Populate the pattern set with the following patterns: +/// +/// [FlattenGather] +/// Flattens 2 or more dimensional `vector.gather` ops by unrolling the +/// outermost dimension. For example: +/// +/// [Gather1DToConditionalLoads] +/// Turns 1-d `vector.gather` into a scalarized sequence of `vector.loads` or +/// `tensor.extract`s. To avoid out-of-bounds memory accesses, these +/// loads/extracts are made conditional using `scf.if` ops. +void populateVectorGatherLoweringPatterns(RewritePatternSet &patterns, + PatternBenefit benefit = 1); + +/// Populates instances of `MaskOpRewritePattern` to lower masked operations +/// with `vector.mask`. Patterns should rewrite the `vector.mask` operation and +/// not its nested `MaskableOpInterface`. +void populateVectorMaskLoweringPatternsForSideEffectingOps( + RewritePatternSet &patterns); + +} // namespace vector +} // namespace mlir +#endif // MLIR_DIALECT_VECTOR_TRANSFORMS_LOWERINGPATTERNS_H diff --git a/mlir/include/mlir/Dialect/Vector/Transforms/Passes.h b/mlir/include/mlir/Dialect/Vector/Transforms/Passes.h index d0c06f69930d2..bf89b01e2b60c 100644 --- a/mlir/include/mlir/Dialect/Vector/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Vector/Transforms/Passes.h @@ -22,12 +22,6 @@ std::unique_ptr createVectorBufferizePass(); /// Creates an instance of the `vector.mask` lowering pass. std::unique_ptr createLowerVectorMaskPass(); -/// Populates instances of `MaskOpRewritePattern` to lower masked operations -/// with `vector.mask`. Patterns should rewrite the `vector.mask` operation and -/// not its nested `MaskableOpInterface`. -void populateVectorMaskLoweringPatternsForSideEffectingOps( - RewritePatternSet &patterns); - //===----------------------------------------------------------------------===// // Registration //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h b/mlir/include/mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h index af68de7e0051e..a79bbd0be0975 100644 --- a/mlir/include/mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h +++ b/mlir/include/mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h @@ -9,8 +9,8 @@ #ifndef MLIR_DIALECT_VECTOR_TRANSFORMS_VECTORREWRITEPATTERNS_H #define MLIR_DIALECT_VECTOR_TRANSFORMS_VECTORREWRITEPATTERNS_H -#include #include +#include #include "mlir/Dialect/Vector/IR/VectorOps.h" #include "mlir/Dialect/Vector/Transforms/VectorTransformsEnums.h.inc" @@ -23,42 +23,7 @@ namespace mlir { class RewritePatternSet; namespace vector { - -//===----------------------------------------------------------------------===// -// Vector transformation options exposed as auxiliary structs. -//===----------------------------------------------------------------------===// -/// Structure to control the behavior of vector transform patterns. -struct VectorTransformsOptions { - /// Option to control the lowering of vector.contract. - VectorContractLowering vectorContractLowering = VectorContractLowering::Dot; - VectorTransformsOptions & - setVectorTransformsOptions(VectorContractLowering opt) { - vectorContractLowering = opt; - return *this; - } - /// Option to control the lowering of vector.multi_reduction. - VectorMultiReductionLowering vectorMultiReductionLowering = - VectorMultiReductionLowering::InnerParallel; - VectorTransformsOptions & - setVectorMultiReductionLowering(VectorMultiReductionLowering opt) { - vectorMultiReductionLowering = opt; - return *this; - } - /// Option to control the lowering of vector.transpose. - VectorTransposeLowering vectorTransposeLowering = - VectorTransposeLowering::EltWise; - VectorTransformsOptions & - setVectorTransposeLowering(VectorTransposeLowering opt) { - vectorTransposeLowering = opt; - return *this; - } - /// Option to control the splitting of vector transfers. - VectorTransferSplit vectorTransferSplit = VectorTransferSplit::None; - VectorTransformsOptions &setVectorTransferSplit(VectorTransferSplit opt) { - vectorTransferSplit = opt; - return *this; - } -}; +struct VectorTransformsOptions; /// Options that control the vector unrolling. struct UnrollVectorOptions { @@ -109,45 +74,6 @@ struct UnrollVectorOptions { // Vector transformation exposed as populate functions over rewrite patterns. //===----------------------------------------------------------------------===// -/// Insert TransposeLowering patterns into extraction/insertion. -void populateVectorTransposeLoweringPatterns( - RewritePatternSet &patterns, - VectorTransformsOptions options = VectorTransformsOptions(), - PatternBenefit benefit = 1); - -/// Collect a set of patterns to convert vector.multi_reduction op into -/// a sequence of vector.reduction ops. The patterns comprise: -/// - InnerOuterDimReductionConversion: rewrites vector.multi_reduction such -/// that all reduction dimensions are either innermost or outermost, by adding -/// the proper vector.transpose operations. -/// - ReduceMultiDimReductionRank: once in innermost or outermost reduction -/// form, rewrites n-D vector.multi_reduction into 2-D vector.multi_reduction, -/// by introducing vector.shape_cast ops to collapse + multi-reduce + expand -/// back. -/// - TwoDimMultiReductionToElementWise: once in 2-D vector.multi_reduction -/// form, with an **outermost** reduction dimension, unroll the outer dimension -/// to obtain a sequence of 1-D vector ops. This also has an opportunity for -/// tree-reduction (in the future). -/// - TwoDimMultiReductionToReduction: once in 2-D vector.multi_reduction form, -/// with an **innermost** reduction dimension, unroll the outer dimension to -/// obtain a sequence of extract + vector.reduction + insert. This can further -/// lower to horizontal reduction ops. -/// - OneDimMultiReductionToTwoDim: for cases that reduce to 1-D vector -/// reduction (and are thus missing either a parallel or a reduction), we lift -/// them back up to 2-D with a simple vector.shape_cast to vector<1xk> so that -/// the other patterns can kick in, thus fully exiting out of the -/// vector.multi_reduction abstraction. -void populateVectorMultiReductionLoweringPatterns( - RewritePatternSet &patterns, VectorMultiReductionLowering options, - PatternBenefit benefit = 1); - -/// Collects patterns to progressively lower vector contraction ops on high-D -/// into low-D reduction and product ops. -void populateVectorContractLoweringPatterns( - RewritePatternSet &patterns, - VectorTransformsOptions options = VectorTransformsOptions(), - PatternBenefit benefit = 1); - /// Canonicalization of a `vector.contraction %a, %b, %c` with row-major matmul /// semantics to a contraction with MMT semantics (matrix matrix multiplication /// with the RHS transposed). This specific form is meant to have the vector @@ -174,67 +100,43 @@ void populateVectorContractCanonicalizeMatmulToMMT( void populateVectorReductionToContractPatterns(RewritePatternSet &patterns, PatternBenefit benefit = 1); -/// Collect patterns to convert scan op -void populateVectorScanLoweringPatterns(RewritePatternSet &patterns, - PatternBenefit benefit = 1); - -//===----------------------------------------------------------------------===// -// Vector.transfer patterns. -//===----------------------------------------------------------------------===// -/// Collect a set of transfer read/write lowering patterns that simplify the -/// permutation map (e.g., converting it to a minor identity map) by inserting -/// broadcasts and transposes. More specifically: -/// -/// [TransferReadPermutationLowering] -/// Lower transfer_read op with permutation into a transfer_read with a -/// permutation map composed of leading zeros followed by a minor identity + -/// vector.transpose op. -/// Ex: -/// vector.transfer_read ... -/// permutation_map: (d0, d1, d2) -> (0, d1) -/// into: -/// %v = vector.transfer_read ... -/// permutation_map: (d0, d1, d2) -> (d1, 0) -/// vector.transpose %v, [1, 0] +/// Populate `patterns` with the following patterns. /// -/// vector.transfer_read ... -/// permutation_map: (d0, d1, d2, d3) -> (0, 0, 0, d1, d3) -/// into: -/// %v = vector.transfer_read ... -/// permutation_map: (d0, d1, d2, d3) -> (0, 0, d1, 0, d3) -/// vector.transpose %v, [0, 1, 3, 2, 4] -/// Note that an alternative is to transform it to linalg.transpose + -/// vector.transfer_read to do the transpose in memory instead. +/// - VectorTransferFullPartialRewriter /// -/// [TransferWritePermutationLowering] -/// Lower transfer_write op with permutation into a transfer_write with a -/// minor identity permutation map. (transfer_write ops cannot have broadcasts.) -/// Ex: -/// vector.transfer_write %v ... -/// permutation_map: (d0, d1, d2) -> (d2, d0, d1) -/// into: -/// %tmp = vector.transpose %v, [2, 0, 1] -/// vector.transfer_write %tmp ... -/// permutation_map: (d0, d1, d2) -> (d0, d1, d2) +/// Split a vector.transfer operation into an in-bounds (i.e., no out-of-bounds +/// masking) fast path and a slow path. /// -/// vector.transfer_write %v ... -/// permutation_map: (d0, d1, d2, d3) -> (d3, d2) -/// into: -/// %tmp = vector.transpose %v, [1, 0] -/// %v = vector.transfer_write %tmp ... -/// permutation_map: (d0, d1, d2, d3) -> (d2, d3) +/// Example (a 2-D vector.transfer_read): +/// ``` +/// %1 = vector.transfer_read %0[...], %pad : memref, vector<...> +/// ``` +/// is transformed into: +/// ``` +/// %1:3 = scf.if (%inBounds) { +/// // fast path, direct cast +/// memref.cast %A: memref to compatibleMemRefType +/// scf.yield %view : compatibleMemRefType, index, index +/// } else { +/// // slow path, not in-bounds vector.transfer or linalg.copy. +/// memref.cast %alloc: memref to compatibleMemRefType +/// scf.yield %4 : compatibleMemRefType, index, index +// } +/// %0 = vector.transfer_read %1#0[%1#1, %1#2] {in_bounds = [true ... true]} +/// ``` +/// where `alloc` is a top of the function alloca'ed buffer of one vector. /// -/// [TransferOpReduceRank] -/// Lower transfer_read op with broadcast in the leading dimensions into -/// transfer_read of lower rank + vector.broadcast. -/// Ex: vector.transfer_read ... -/// permutation_map: (d0, d1, d2, d3) -> (0, d1, 0, d3) -/// into: -/// %v = vector.transfer_read ... -/// permutation_map: (d0, d1, d2, d3) -> (d1, 0, d3) -/// vector.broadcast %v -void populateVectorTransferPermutationMapLoweringPatterns( - RewritePatternSet &patterns, PatternBenefit benefit = 1); +/// Preconditions: +/// 1. `xferOp.permutation_map()` must be a minor identity map +/// 2. the rank of the `xferOp.memref()` and the rank of the `xferOp.vector()` +/// must be equal. This will be relaxed in the future but requires +/// rank-reducing subviews. +void populateVectorTransferFullPartialPatterns( + RewritePatternSet &patterns, const VectorTransformsOptions &options); + +//===----------------------------------------------------------------------===// +// Vector.transfer patterns. +//===----------------------------------------------------------------------===// /// Collect a set of patterns to reduce the rank of the operands of vector /// transfer ops to operate on the largest contigious vector. @@ -334,220 +236,6 @@ void populateVectorUnrollPatterns(RewritePatternSet &patterns, const UnrollVectorOptions &options, PatternBenefit benefit = 1); -/// Expands `vector.gather` ops into a series of conditional scalar loads -/// (`vector.load` for memrefs or `tensor.extract` for tensors). These loads are -/// conditional to avoid out-of-bounds memory accesses and guarded with `scf.if` -/// ops. This lowering path is intended for targets that do not feature -/// dedicated gather ops. -void populateVectorGatherLoweringPatterns(RewritePatternSet &patterns, - PatternBenefit benefit = 1); - -//===----------------------------------------------------------------------===// -// Finer-grained patterns exposed for more control over individual lowerings. -//===----------------------------------------------------------------------===// -/// Apply `splitFullAndPartialTransfer` selectively via a pattern. This pattern -/// may take an extra filter to perform selection at a finer granularity. -struct VectorTransferFullPartialRewriter : public RewritePattern { - using FilterConstraintType = - std::function; - - explicit VectorTransferFullPartialRewriter( - MLIRContext *context, - VectorTransformsOptions options = VectorTransformsOptions(), - FilterConstraintType filter = - [](VectorTransferOpInterface op) { return success(); }, - PatternBenefit benefit = 1) - : RewritePattern(MatchAnyOpTypeTag(), benefit, context), options(options), - filter(std::move(filter)) {} - - /// Performs the rewrite. - LogicalResult matchAndRewrite(Operation *op, - PatternRewriter &rewriter) const override; - -private: - VectorTransformsOptions options; - FilterConstraintType filter; -}; - -/// Progressive lowering of a `vector.contract %a, %b, %c` with row-major matmul -/// semantics to: -/// ``` -/// %flattened_a = vector.shape_cast %a -/// %flattened_b = vector.shape_cast %b -/// %flattened_d = vector.matmul %flattened_a, %flattened_b -/// %d = vector.shape_cast %%flattened_d -/// %e = add %c, %d -/// ``` -/// `vector.matmul` later lowers to `llvm.matrix.multiply`. -// -/// This only kicks in when VectorTransformsOptions is set to OuterProduct and -/// the vector.contract op is a row-major matrix multiply. -class ContractionOpToMatmulOpLowering - : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - using FilterConstraintType = - std::function; - - static LogicalResult defaultFilter(vector::ContractionOp op) { - return success(); - } - - ContractionOpToMatmulOpLowering( - vector::VectorTransformsOptions vectorTransformOptions, - MLIRContext *context, PatternBenefit benefit = 1, - FilterConstraintType constraint = defaultFilter) - : OpRewritePattern(context, benefit), - vectorTransformOptions(vectorTransformOptions), - filter(std::move(constraint)) {} - - LogicalResult matchAndRewrite(vector::ContractionOp op, - PatternRewriter &rewriter) const override; - -private: - /// Options to control the vector patterns. - vector::VectorTransformsOptions vectorTransformOptions; - FilterConstraintType filter; -}; - -/// Progressive lowering of a `vector.contract %a, %b, %c` with row-major matmul -/// semantics to a reduction_size-unrolled sequence: -/// ``` -/// %at = vector.transpose %a, [1, 0] -/// %bRow0 = vector.extract %b[0] -/// %atRow0 = vector.extract %at[0] -/// %c0 = vector.outerproduct %atRow0, %bRow0, %c -/// ... -/// %bRowK = vector.extract %b[K] -/// %atRowK = vector.extract %at[K] -/// %cK = vector.outerproduct %atRowK, %bRowK, %cK-1 -/// ``` -/// -/// This only kicks in when VectorTransformsOptions is set to OuterProduct and -/// the vector.contract op is a row-major matrix multiply. -class ContractionOpToOuterProductOpLowering - : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - using FilterConstraintType = - std::function; - - static LogicalResult defaultFilter(vector::ContractionOp op) { - return success(); - } - - ContractionOpToOuterProductOpLowering( - vector::VectorTransformsOptions vectorTransformOptions, - MLIRContext *context, PatternBenefit benefit = 1, - FilterConstraintType constraint = defaultFilter) - : OpRewritePattern(context, benefit), - vectorTransformOptions(vectorTransformOptions), - filter(std::move(constraint)) {} - - LogicalResult matchAndRewrite(vector::ContractionOp op, - PatternRewriter &rewriter) const override; - -private: - /// Options to control the vector patterns. - vector::VectorTransformsOptions vectorTransformOptions; - FilterConstraintType filter; -}; - -/// Progressive lowering of a `vector.contract %a, %b, %c` with row-major matmul -/// semantics to an output-size-unrolled sequence: -/// ``` -/// %out = arith.constant ... : vector -/// %bt = vector.transpose %b, [1, 0] -/// %aRow0 = vector.extract %a[0] -/// %btRow0 = vector.extract %bt[0] -/// %c00 = vector.reduce %atRow0, %bRow0 -/// %out00 = vector.insert %c00, %out[0, 0] -/// ... -/// %aRowLast = vector.extract %at[M-1] -/// %btRowLast = vector.extract %b[N-1] -/// %cLastLast = vector.reduce %atRowLast, %bRowLast -/// %outcLastLast = vector.insert %cLastLast, %out[M-1, N-1] -/// ``` -/// -/// This only kicks in when VectorTransformsOptions is set to Dot and -/// the vector.contract op is a row-major matmul or matvec. -class ContractionOpToDotLowering - : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - using FilterConstraintType = - std::function; - - static LogicalResult defaultFilter(vector::ContractionOp op) { - return success(); - } - - ContractionOpToDotLowering( - vector::VectorTransformsOptions vectorTransformOptions, - MLIRContext *context, PatternBenefit benefit = 1, - const FilterConstraintType &constraint = defaultFilter) - : OpRewritePattern(context, benefit), - vectorTransformOptions(vectorTransformOptions), filter(defaultFilter) {} - - LogicalResult matchAndRewrite(vector::ContractionOp op, - PatternRewriter &rewriter) const override; - -private: - /// Options to control the vector patterns. - vector::VectorTransformsOptions vectorTransformOptions; - FilterConstraintType filter; -}; - -/// Progressive lowering of ContractionOp. -/// -/// One: -/// %x = vector.contract with at least one free/batch dimension -/// is replaced by: -/// %a = vector.contract with one less free/batch dimension -/// %b = vector.contract with one less free/batch dimension -/// .. -/// %x = combine %a %b .. -/// until a pure contraction is reached (no free/batch dimensions), -/// which is replaced by a dot-product. -/// -/// This only kicks in when either VectorTransformsOptions is set -/// to Dot or when other contraction patterns fail. -class ContractionOpLowering : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - using FilterConstraintType = - std::function; - - static LogicalResult defaultFilter(vector::ContractionOp op) { - return success(); - } - - ContractionOpLowering(vector::VectorTransformsOptions vectorTransformOptions, - MLIRContext *context, PatternBenefit benefit = 1, - FilterConstraintType constraint = defaultFilter) - : OpRewritePattern(context, benefit), - vectorTransformOptions(vectorTransformOptions), - filter(std::move(constraint)) {} - - LogicalResult matchAndRewrite(vector::ContractionOp op, - PatternRewriter &rewriter) const override; - -private: - /// Options to control the vector patterns. - vector::VectorTransformsOptions vectorTransformOptions; - FilterConstraintType filter; - // Lower one parallel dimension. - FailureOr lowerParallel(PatternRewriter &rewriter, - vector::ContractionOp op, int64_t lhsIndex, - int64_t rhsIndex, Value mask) const; - // Lower one reduction dimension. - FailureOr lowerReduction(PatternRewriter &rewriter, - vector::ContractionOp op, Value mask) const; -}; - } // namespace vector } // namespace mlir diff --git a/mlir/include/mlir/Dialect/Vector/Transforms/VectorTransforms.h b/mlir/include/mlir/Dialect/Vector/Transforms/VectorTransforms.h index 947911f9a3841..52a4c9cc368d8 100644 --- a/mlir/include/mlir/Dialect/Vector/Transforms/VectorTransforms.h +++ b/mlir/include/mlir/Dialect/Vector/Transforms/VectorTransforms.h @@ -24,17 +24,53 @@ class IfOp; namespace vector { +//===----------------------------------------------------------------------===// +// Vector transformation options exposed as auxiliary structs. +//===----------------------------------------------------------------------===// +/// Structure to control the behavior of vector transform patterns. +struct VectorTransformsOptions { + /// Option to control the lowering of vector.contract. + VectorContractLowering vectorContractLowering = VectorContractLowering::Dot; + VectorTransformsOptions & + setVectorTransformsOptions(VectorContractLowering opt) { + vectorContractLowering = opt; + return *this; + } + /// Option to control the lowering of vector.multi_reduction. + VectorMultiReductionLowering vectorMultiReductionLowering = + VectorMultiReductionLowering::InnerParallel; + VectorTransformsOptions & + setVectorMultiReductionLowering(VectorMultiReductionLowering opt) { + vectorMultiReductionLowering = opt; + return *this; + } + /// Option to control the lowering of vector.transpose. + VectorTransposeLowering vectorTransposeLowering = + VectorTransposeLowering::EltWise; + VectorTransformsOptions & + setVectorTransposeLowering(VectorTransposeLowering opt) { + vectorTransposeLowering = opt; + return *this; + } + /// Option to control the splitting of vector transfers. + VectorTransferSplit vectorTransferSplit = VectorTransferSplit::None; + VectorTransformsOptions &setVectorTransferSplit(VectorTransferSplit opt) { + vectorTransferSplit = opt; + return *this; + } +}; + //===----------------------------------------------------------------------===// // Standalone transformations and helpers. //===----------------------------------------------------------------------===// -/// Split a vector.transfer operation into an in-bounds (i.e., no out-of-bounds -/// masking) fastpath and a slowpath. -/// If `ifOp` is not null and the result is `success, the `ifOp` points to the -/// newly created conditional upon function return. -/// To accomodate for the fact that the original vector.transfer indexing may be -/// arbitrary and the slow path indexes @[0...0] in the temporary buffer, the -/// scf.if op returns a view and values of type index. -/// At this time, only vector.transfer_read case is implemented. +/// Split a vector.transfer operation into an in-bounds (i.e., no +/// out-of-bounds masking) fastpath and a slowpath. If `ifOp` is not null and +/// the result is `success, the `ifOp` points to the newly created conditional +/// upon function return. To accomodate for the fact that the original +/// vector.transfer indexing may be arbitrary and the slow path indexes +/// @[0...0] in the temporary buffer, the scf.if op returns a view and values +/// of type index. At this time, only vector.transfer_read case is +/// implemented. /// /// Example (a 2-D vector.transfer_read): /// ``` @@ -51,15 +87,16 @@ namespace vector { /// memref.cast %alloc: memref to compatibleMemRefType /// scf.yield %4 : compatibleMemRefType, index, index // } -/// %0 = vector.transfer_read %1#0[%1#1, %1#2] {in_bounds = [true ... true]} +/// %0 = vector.transfer_read %1#0[%1#1, %1#2] {in_bounds = [true ... +/// true]} /// ``` /// where `alloc` is a top of the function alloca'ed buffer of one vector. /// /// Preconditions: /// 1. `xferOp.permutation_map()` must be a minor identity map -/// 2. the rank of the `xferOp.memref()` and the rank of the `xferOp.vector()` -/// must be equal. This will be relaxed in the future but requires -/// rank-reducing subviews. +/// 2. the rank of the `xferOp.memref()` and the rank of the +/// `xferOp.vector()` must be equal. This will be relaxed in the future but +/// requires rank-reducing subviews. LogicalResult splitFullAndPartialTransfer( RewriterBase &b, VectorTransferOpInterface xferOp, VectorTransformsOptions options = VectorTransformsOptions(), diff --git a/mlir/include/mlir/IR/AffineExprVisitor.h b/mlir/include/mlir/IR/AffineExprVisitor.h index 30ee1b6e0819c..f6216614c2238 100644 --- a/mlir/include/mlir/IR/AffineExprVisitor.h +++ b/mlir/include/mlir/IR/AffineExprVisitor.h @@ -324,7 +324,7 @@ class SimpleAffineExprFlattener // A floordiv is thus flattened by introducing a new local variable q, and // replacing that expression with 'q' while adding the constraints // c * q <= expr <= c * q + c - 1 to localVarCst (done by - // FlatAffineConstraints::addLocalFloorDiv). + // IntegerRelation::addLocalFloorDiv). // // A ceildiv is similarly flattened: // t = expr ceildiv c <=> t = (expr + c - 1) floordiv c diff --git a/mlir/include/mlir/IR/AffineMap.h b/mlir/include/mlir/IR/AffineMap.h index cc7c794f1f933..e21dc9c950c5a 100644 --- a/mlir/include/mlir/IR/AffineMap.h +++ b/mlir/include/mlir/IR/AffineMap.h @@ -249,11 +249,13 @@ class AffineMap { /// Returns a new AffineMap with the same number of dims and symbols and one /// less result at `pos`, dropped. - AffineMap dropResult(int64_t pos) { return dropResults({pos}); } + AffineMap dropResult(int64_t pos) const { + return dropResults(ArrayRef({pos})); + } // Returns a new AffineMap with the same number of dims and symbols, but all - // positions in `positions` dropped from results. - AffineMap dropResults(ArrayRef positions) { + // results in `positions` dropped. + AffineMap dropResults(ArrayRef positions) const { SmallVector reverse_sorted_positions = llvm::to_vector(positions); llvm::sort(reverse_sorted_positions, std::greater()); @@ -263,9 +265,13 @@ class AffineMap { return AffineMap::get(getNumDims(), getNumSymbols(), exprs, getContext()); } + // Returns a new AffineMap with the same number of dims and symbols, but all + // results in `positions` dropped. + AffineMap dropResults(const llvm::SmallBitVector &positions) const; + /// Returns a new AffineMap with the same number of dims and symbols and an /// extra result inserted at `pos`. - AffineMap insertResult(AffineExpr expr, unsigned pos) { + AffineMap insertResult(AffineExpr expr, unsigned pos) const { auto exprs = llvm::to_vector<4>(getResults()); exprs.insert(exprs.begin() + pos, expr); return AffineMap::get(getNumDims(), getNumSymbols(), exprs, getContext()); @@ -583,6 +589,12 @@ llvm::SmallBitVector getUnusedDimsBitVector(ArrayRef maps); // by any of the maps in the input array `maps`. llvm::SmallBitVector getUnusedSymbolsBitVector(ArrayRef maps); +/// Expand `map` to operate on `rank` dims while projecting out the dims in +/// `projectedDimensions`. This amounts to composing `map` with +/// `id(rank).dropResults(projectedDimensions)`. +AffineMap expandDimsToRank(AffineMap map, int64_t rank, + const llvm::SmallBitVector &projectedDimensions); + inline raw_ostream &operator<<(raw_ostream &os, AffineMap map) { map.print(os); return os; diff --git a/mlir/include/mlir/IR/IntegerSet.h b/mlir/include/mlir/IR/IntegerSet.h index b8affcae74e6e..f814776f1ee7f 100644 --- a/mlir/include/mlir/IR/IntegerSet.h +++ b/mlir/include/mlir/IR/IntegerSet.h @@ -17,7 +17,7 @@ // This class is not meant for affine analysis and operations like set // operations, emptiness checks, or other math operations for analysis and -// transformation. For the latter, use FlatAffineConstraints. +// transformation. For the latter, use FlatAffineValueConstraints. // //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Analysis/CMakeLists.txt b/mlir/lib/Analysis/CMakeLists.txt index 25263db944e97..b68e03c5748fc 100644 --- a/mlir/lib/Analysis/CMakeLists.txt +++ b/mlir/lib/Analysis/CMakeLists.txt @@ -2,6 +2,7 @@ set(LLVM_OPTIONAL_SOURCES AliasAnalysis.cpp CallGraph.cpp DataLayoutAnalysis.cpp + FlatLinearValueConstraints.cpp Liveness.cpp SliceAnalysis.cpp @@ -14,11 +15,14 @@ set(LLVM_OPTIONAL_SOURCES DataFlow/SparseAnalysis.cpp ) +add_subdirectory(Presburger) + add_mlir_library(MLIRAnalysis AliasAnalysis.cpp CallGraph.cpp DataFlowFramework.cpp DataLayoutAnalysis.cpp + FlatLinearValueConstraints.cpp Liveness.cpp SliceAnalysis.cpp @@ -43,8 +47,8 @@ add_mlir_library(MLIRAnalysis MLIRInferIntRangeInterface MLIRInferTypeOpInterface MLIRLoopLikeInterface + MLIRPresburger MLIRSideEffectInterfaces MLIRViewLikeInterface ) -add_subdirectory(Presburger) diff --git a/mlir/lib/Analysis/FlatLinearValueConstraints.cpp b/mlir/lib/Analysis/FlatLinearValueConstraints.cpp new file mode 100644 index 0000000000000..24c8d871ff97c --- /dev/null +++ b/mlir/lib/Analysis/FlatLinearValueConstraints.cpp @@ -0,0 +1,1343 @@ +//===- FlatLinearValueConstraints.cpp - Linear Constraint -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Analysis//FlatLinearValueConstraints.h" + +#include "mlir/Analysis/Presburger/LinearTransform.h" +#include "mlir/Analysis/Presburger/Simplex.h" +#include "mlir/Analysis/Presburger/Utils.h" +#include "mlir/IR/AffineExprVisitor.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/IntegerSet.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Support/MathExtras.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include + +#define DEBUG_TYPE "flat-value-constraints" + +using namespace mlir; +using namespace presburger; + +//===----------------------------------------------------------------------===// +// AffineExprFlattener +//===----------------------------------------------------------------------===// + +namespace { + +// See comments for SimpleAffineExprFlattener. +// An AffineExprFlattener extends a SimpleAffineExprFlattener by recording +// constraint information associated with mod's, floordiv's, and ceildiv's +// in FlatLinearConstraints 'localVarCst'. +struct AffineExprFlattener : public SimpleAffineExprFlattener { +public: + // Constraints connecting newly introduced local variables (for mod's and + // div's) to existing (dimensional and symbolic) ones. These are always + // inequalities. + IntegerPolyhedron localVarCst; + + AffineExprFlattener(unsigned nDims, unsigned nSymbols) + : SimpleAffineExprFlattener(nDims, nSymbols), + localVarCst(PresburgerSpace::getSetSpace(nDims, nSymbols)) {} + +private: + // Add a local variable (needed to flatten a mod, floordiv, ceildiv expr). + // The local variable added is always a floordiv of a pure add/mul affine + // function of other variables, coefficients of which are specified in + // `dividend' and with respect to the positive constant `divisor'. localExpr + // is the simplified tree expression (AffineExpr) corresponding to the + // quantifier. + void addLocalFloorDivId(ArrayRef dividend, int64_t divisor, + AffineExpr localExpr) override { + SimpleAffineExprFlattener::addLocalFloorDivId(dividend, divisor, localExpr); + // Update localVarCst. + localVarCst.addLocalFloorDiv(dividend, divisor); + } +}; + +} // namespace + +// Flattens the expressions in map. Returns failure if 'expr' was unable to be +// flattened (i.e., semi-affine expressions not handled yet). +static LogicalResult +getFlattenedAffineExprs(ArrayRef exprs, unsigned numDims, + unsigned numSymbols, + std::vector> *flattenedExprs, + FlatLinearConstraints *localVarCst) { + if (exprs.empty()) { + if (localVarCst) + *localVarCst = FlatLinearConstraints(numDims, numSymbols); + return success(); + } + + AffineExprFlattener flattener(numDims, numSymbols); + // Use the same flattener to simplify each expression successively. This way + // local variables / expressions are shared. + for (auto expr : exprs) { + if (!expr.isPureAffine()) + return failure(); + + flattener.walkPostOrder(expr); + } + + assert(flattener.operandExprStack.size() == exprs.size()); + flattenedExprs->clear(); + flattenedExprs->assign(flattener.operandExprStack.begin(), + flattener.operandExprStack.end()); + + if (localVarCst) + localVarCst->clearAndCopyFrom(flattener.localVarCst); + + return success(); +} + +// Flattens 'expr' into 'flattenedExpr'. Returns failure if 'expr' was unable to +// be flattened (semi-affine expressions not handled yet). +LogicalResult +mlir::getFlattenedAffineExpr(AffineExpr expr, unsigned numDims, + unsigned numSymbols, + SmallVectorImpl *flattenedExpr, + FlatLinearConstraints *localVarCst) { + std::vector> flattenedExprs; + LogicalResult ret = ::getFlattenedAffineExprs({expr}, numDims, numSymbols, + &flattenedExprs, localVarCst); + *flattenedExpr = flattenedExprs[0]; + return ret; +} + +/// Flattens the expressions in map. Returns failure if 'expr' was unable to be +/// flattened (i.e., semi-affine expressions not handled yet). +LogicalResult mlir::getFlattenedAffineExprs( + AffineMap map, std::vector> *flattenedExprs, + FlatLinearConstraints *localVarCst) { + if (map.getNumResults() == 0) { + if (localVarCst) + *localVarCst = + FlatLinearConstraints(map.getNumDims(), map.getNumSymbols()); + return success(); + } + return ::getFlattenedAffineExprs(map.getResults(), map.getNumDims(), + map.getNumSymbols(), flattenedExprs, + localVarCst); +} + +LogicalResult mlir::getFlattenedAffineExprs( + IntegerSet set, std::vector> *flattenedExprs, + FlatLinearConstraints *localVarCst) { + if (set.getNumConstraints() == 0) { + if (localVarCst) + *localVarCst = + FlatLinearConstraints(set.getNumDims(), set.getNumSymbols()); + return success(); + } + return ::getFlattenedAffineExprs(set.getConstraints(), set.getNumDims(), + set.getNumSymbols(), flattenedExprs, + localVarCst); +} + +//===----------------------------------------------------------------------===// +// FlatLinearConstraints +//===----------------------------------------------------------------------===// + +std::unique_ptr FlatLinearConstraints::clone() const { + return std::make_unique(*this); +} + +// Similar to `composeMap` except that no Values need be associated with the +// constraint system nor are they looked at -- the dimensions and symbols of +// `other` are expected to correspond 1:1 to `this` system. +LogicalResult FlatLinearConstraints::composeMatchingMap(AffineMap other) { + assert(other.getNumDims() == getNumDimVars() && "dim mismatch"); + assert(other.getNumSymbols() == getNumSymbolVars() && "symbol mismatch"); + + std::vector> flatExprs; + if (failed(flattenAlignedMapAndMergeLocals(other, &flatExprs))) + return failure(); + assert(flatExprs.size() == other.getNumResults()); + + // Add dimensions corresponding to the map's results. + insertDimVar(/*pos=*/0, /*num=*/other.getNumResults()); + + // We add one equality for each result connecting the result dim of the map to + // the other variables. + // E.g.: if the expression is 16*i0 + i1, and this is the r^th + // iteration/result of the value map, we are adding the equality: + // d_r - 16*i0 - i1 = 0. Similarly, when flattening (i0 + 1, i0 + 8*i2), we + // add two equalities: d_0 - i0 - 1 == 0, d1 - i0 - 8*i2 == 0. + for (unsigned r = 0, e = flatExprs.size(); r < e; r++) { + const auto &flatExpr = flatExprs[r]; + assert(flatExpr.size() >= other.getNumInputs() + 1); + + SmallVector eqToAdd(getNumCols(), 0); + // Set the coefficient for this result to one. + eqToAdd[r] = 1; + + // Dims and symbols. + for (unsigned i = 0, f = other.getNumInputs(); i < f; i++) { + // Negate `eq[r]` since the newly added dimension will be set to this one. + eqToAdd[e + i] = -flatExpr[i]; + } + // Local columns of `eq` are at the beginning. + unsigned j = getNumDimVars() + getNumSymbolVars(); + unsigned end = flatExpr.size() - 1; + for (unsigned i = other.getNumInputs(); i < end; i++, j++) { + eqToAdd[j] = -flatExpr[i]; + } + + // Constant term. + eqToAdd[getNumCols() - 1] = -flatExpr[flatExpr.size() - 1]; + + // Add the equality connecting the result of the map to this constraint set. + addEquality(eqToAdd); + } + + return success(); +} + +// Determine whether the variable at 'pos' (say var_r) can be expressed as +// modulo of another known variable (say var_n) w.r.t a constant. For example, +// if the following constraints hold true: +// ``` +// 0 <= var_r <= divisor - 1 +// var_n - (divisor * q_expr) = var_r +// ``` +// where `var_n` is a known variable (called dividend), and `q_expr` is an +// `AffineExpr` (called the quotient expression), `var_r` can be written as: +// +// `var_r = var_n mod divisor`. +// +// Additionally, in a special case of the above constaints where `q_expr` is an +// variable itself that is not yet known (say `var_q`), it can be written as a +// floordiv in the following way: +// +// `var_q = var_n floordiv divisor`. +// +// Returns true if the above mod or floordiv are detected, updating 'memo' with +// these new expressions. Returns false otherwise. +static bool detectAsMod(const FlatLinearConstraints &cst, unsigned pos, + int64_t lbConst, int64_t ubConst, + SmallVectorImpl &memo, + MLIRContext *context) { + assert(pos < cst.getNumVars() && "invalid position"); + + // Check if a divisor satisfying the condition `0 <= var_r <= divisor - 1` can + // be determined. + if (lbConst != 0 || ubConst < 1) + return false; + int64_t divisor = ubConst + 1; + + // Check for the aforementioned conditions in each equality. + for (unsigned curEquality = 0, numEqualities = cst.getNumEqualities(); + curEquality < numEqualities; curEquality++) { + int64_t coefficientAtPos = cst.atEq64(curEquality, pos); + // If current equality does not involve `var_r`, continue to the next + // equality. + if (coefficientAtPos == 0) + continue; + + // Constant term should be 0 in this equality. + if (cst.atEq64(curEquality, cst.getNumCols() - 1) != 0) + continue; + + // Traverse through the equality and construct the dividend expression + // `dividendExpr`, to contain all the variables which are known and are + // not divisible by `(coefficientAtPos * divisor)`. Hope here is that the + // `dividendExpr` gets simplified into a single variable `var_n` discussed + // above. + auto dividendExpr = getAffineConstantExpr(0, context); + + // Track the terms that go into quotient expression, later used to detect + // additional floordiv. + unsigned quotientCount = 0; + int quotientPosition = -1; + int quotientSign = 1; + + // Consider each term in the current equality. + unsigned curVar, e; + for (curVar = 0, e = cst.getNumDimAndSymbolVars(); curVar < e; ++curVar) { + // Ignore var_r. + if (curVar == pos) + continue; + int64_t coefficientOfCurVar = cst.atEq64(curEquality, curVar); + // Ignore vars that do not contribute to the current equality. + if (coefficientOfCurVar == 0) + continue; + // Check if the current var goes into the quotient expression. + if (coefficientOfCurVar % (divisor * coefficientAtPos) == 0) { + quotientCount++; + quotientPosition = curVar; + quotientSign = (coefficientOfCurVar * coefficientAtPos) > 0 ? 1 : -1; + continue; + } + // Variables that are part of dividendExpr should be known. + if (!memo[curVar]) + break; + // Append the current variable to the dividend expression. + dividendExpr = dividendExpr + memo[curVar] * coefficientOfCurVar; + } + + // Can't construct expression as it depends on a yet uncomputed var. + if (curVar < e) + continue; + + // Express `var_r` in terms of the other vars collected so far. + if (coefficientAtPos > 0) + dividendExpr = (-dividendExpr).floorDiv(coefficientAtPos); + else + dividendExpr = dividendExpr.floorDiv(-coefficientAtPos); + + // Simplify the expression. + dividendExpr = simplifyAffineExpr(dividendExpr, cst.getNumDimVars(), + cst.getNumSymbolVars()); + // Only if the final dividend expression is just a single var (which we call + // `var_n`), we can proceed. + // TODO: Handle AffineSymbolExpr as well. There is no reason to restrict it + // to dims themselves. + auto dimExpr = dividendExpr.dyn_cast(); + if (!dimExpr) + continue; + + // Express `var_r` as `var_n % divisor` and store the expression in `memo`. + if (quotientCount >= 1) { + auto ub = cst.getConstantBound64(BoundType::UB, dimExpr.getPosition()); + // If `var_n` has an upperbound that is less than the divisor, mod can be + // eliminated altogether. + if (ub && *ub < divisor) + memo[pos] = dimExpr; + else + memo[pos] = dimExpr % divisor; + // If a unique quotient `var_q` was seen, it can be expressed as + // `var_n floordiv divisor`. + if (quotientCount == 1 && !memo[quotientPosition]) + memo[quotientPosition] = dimExpr.floorDiv(divisor) * quotientSign; + + return true; + } + } + return false; +} + +/// Check if the pos^th variable can be expressed as a floordiv of an affine +/// function of other variables (where the divisor is a positive constant) +/// given the initial set of expressions in `exprs`. If it can be, the +/// corresponding position in `exprs` is set as the detected affine expr. For +/// eg: 4q <= i + j <= 4q + 3 <=> q = (i + j) floordiv 4. An equality can +/// also yield a floordiv: eg. 4q = i + j <=> q = (i + j) floordiv 4. 32q + 28 +/// <= i <= 32q + 31 => q = i floordiv 32. +static bool detectAsFloorDiv(const FlatLinearConstraints &cst, unsigned pos, + MLIRContext *context, + SmallVectorImpl &exprs) { + assert(pos < cst.getNumVars() && "invalid position"); + + // Get upper-lower bound pair for this variable. + SmallVector foundRepr(cst.getNumVars(), false); + for (unsigned i = 0, e = cst.getNumVars(); i < e; ++i) + if (exprs[i]) + foundRepr[i] = true; + + SmallVector dividend(cst.getNumCols()); + unsigned divisor; + auto ulPair = computeSingleVarRepr(cst, foundRepr, pos, dividend, divisor); + + // No upper-lower bound pair found for this var. + if (ulPair.kind == ReprKind::None || ulPair.kind == ReprKind::Equality) + return false; + + // Construct the dividend expression. + auto dividendExpr = getAffineConstantExpr(dividend.back(), context); + for (unsigned c = 0, f = cst.getNumVars(); c < f; c++) + if (dividend[c] != 0) + dividendExpr = dividendExpr + dividend[c] * exprs[c]; + + // Successfully detected the floordiv. + exprs[pos] = dividendExpr.floorDiv(divisor); + return true; +} + +std::pair FlatLinearConstraints::getLowerAndUpperBound( + unsigned pos, unsigned offset, unsigned num, unsigned symStartPos, + ArrayRef localExprs, MLIRContext *context, + bool closedUB) const { + assert(pos + offset < getNumDimVars() && "invalid dim start pos"); + assert(symStartPos >= (pos + offset) && "invalid sym start pos"); + assert(getNumLocalVars() == localExprs.size() && + "incorrect local exprs count"); + + SmallVector lbIndices, ubIndices, eqIndices; + getLowerAndUpperBoundIndices(pos + offset, &lbIndices, &ubIndices, &eqIndices, + offset, num); + + /// Add to 'b' from 'a' in set [0, offset) U [offset + num, symbStartPos). + auto addCoeffs = [&](ArrayRef a, SmallVectorImpl &b) { + b.clear(); + for (unsigned i = 0, e = a.size(); i < e; ++i) { + if (i < offset || i >= offset + num) + b.push_back(a[i]); + } + }; + + SmallVector lb, ub; + SmallVector lbExprs; + unsigned dimCount = symStartPos - num; + unsigned symCount = getNumDimAndSymbolVars() - symStartPos; + lbExprs.reserve(lbIndices.size() + eqIndices.size()); + // Lower bound expressions. + for (auto idx : lbIndices) { + auto ineq = getInequality64(idx); + // Extract the lower bound (in terms of other coeff's + const), i.e., if + // i - j + 1 >= 0 is the constraint, 'pos' is for i the lower bound is j + // - 1. + addCoeffs(ineq, lb); + std::transform(lb.begin(), lb.end(), lb.begin(), std::negate()); + auto expr = + getAffineExprFromFlatForm(lb, dimCount, symCount, localExprs, context); + // expr ceildiv divisor is (expr + divisor - 1) floordiv divisor + int64_t divisor = std::abs(ineq[pos + offset]); + expr = (expr + divisor - 1).floorDiv(divisor); + lbExprs.push_back(expr); + } + + SmallVector ubExprs; + ubExprs.reserve(ubIndices.size() + eqIndices.size()); + // Upper bound expressions. + for (auto idx : ubIndices) { + auto ineq = getInequality64(idx); + // Extract the upper bound (in terms of other coeff's + const). + addCoeffs(ineq, ub); + auto expr = + getAffineExprFromFlatForm(ub, dimCount, symCount, localExprs, context); + expr = expr.floorDiv(std::abs(ineq[pos + offset])); + int64_t ubAdjustment = closedUB ? 0 : 1; + ubExprs.push_back(expr + ubAdjustment); + } + + // Equalities. It's both a lower and a upper bound. + SmallVector b; + for (auto idx : eqIndices) { + auto eq = getEquality64(idx); + addCoeffs(eq, b); + if (eq[pos + offset] > 0) + std::transform(b.begin(), b.end(), b.begin(), std::negate()); + + // Extract the upper bound (in terms of other coeff's + const). + auto expr = + getAffineExprFromFlatForm(b, dimCount, symCount, localExprs, context); + expr = expr.floorDiv(std::abs(eq[pos + offset])); + // Upper bound is exclusive. + ubExprs.push_back(expr + 1); + // Lower bound. + expr = + getAffineExprFromFlatForm(b, dimCount, symCount, localExprs, context); + expr = expr.ceilDiv(std::abs(eq[pos + offset])); + lbExprs.push_back(expr); + } + + auto lbMap = AffineMap::get(dimCount, symCount, lbExprs, context); + auto ubMap = AffineMap::get(dimCount, symCount, ubExprs, context); + + return {lbMap, ubMap}; +} + +/// Computes the lower and upper bounds of the first 'num' dimensional +/// variables (starting at 'offset') as affine maps of the remaining +/// variables (dimensional and symbolic variables). Local variables are +/// themselves explicitly computed as affine functions of other variables in +/// this process if needed. +void FlatLinearConstraints::getSliceBounds(unsigned offset, unsigned num, + MLIRContext *context, + SmallVectorImpl *lbMaps, + SmallVectorImpl *ubMaps, + bool closedUB) { + assert(num < getNumDimVars() && "invalid range"); + + // Basic simplification. + normalizeConstraintsByGCD(); + + LLVM_DEBUG(llvm::dbgs() << "getSliceBounds for first " << num + << " variables\n"); + LLVM_DEBUG(dump()); + + // Record computed/detected variables. + SmallVector memo(getNumVars()); + // Initialize dimensional and symbolic variables. + for (unsigned i = 0, e = getNumDimVars(); i < e; i++) { + if (i < offset) + memo[i] = getAffineDimExpr(i, context); + else if (i >= offset + num) + memo[i] = getAffineDimExpr(i - num, context); + } + for (unsigned i = getNumDimVars(), e = getNumDimAndSymbolVars(); i < e; i++) + memo[i] = getAffineSymbolExpr(i - getNumDimVars(), context); + + bool changed; + do { + changed = false; + // Identify yet unknown variables as constants or mod's / floordiv's of + // other variables if possible. + for (unsigned pos = 0; pos < getNumVars(); pos++) { + if (memo[pos]) + continue; + + auto lbConst = getConstantBound64(BoundType::LB, pos); + auto ubConst = getConstantBound64(BoundType::UB, pos); + if (lbConst.has_value() && ubConst.has_value()) { + // Detect equality to a constant. + if (*lbConst == *ubConst) { + memo[pos] = getAffineConstantExpr(*lbConst, context); + changed = true; + continue; + } + + // Detect a variable as modulo of another variable w.r.t a + // constant. + if (detectAsMod(*this, pos, *lbConst, *ubConst, memo, context)) { + changed = true; + continue; + } + } + + // Detect a variable as a floordiv of an affine function of other + // variables (divisor is a positive constant). + if (detectAsFloorDiv(*this, pos, context, memo)) { + changed = true; + continue; + } + + // Detect a variable as an expression of other variables. + unsigned idx; + if (!findConstraintWithNonZeroAt(pos, /*isEq=*/true, &idx)) { + continue; + } + + // Build AffineExpr solving for variable 'pos' in terms of all others. + auto expr = getAffineConstantExpr(0, context); + unsigned j, e; + for (j = 0, e = getNumVars(); j < e; ++j) { + if (j == pos) + continue; + int64_t c = atEq64(idx, j); + if (c == 0) + continue; + // If any of the involved IDs hasn't been found yet, we can't proceed. + if (!memo[j]) + break; + expr = expr + memo[j] * c; + } + if (j < e) + // Can't construct expression as it depends on a yet uncomputed + // variable. + continue; + + // Add constant term to AffineExpr. + expr = expr + atEq64(idx, getNumVars()); + int64_t vPos = atEq64(idx, pos); + assert(vPos != 0 && "expected non-zero here"); + if (vPos > 0) + expr = (-expr).floorDiv(vPos); + else + // vPos < 0. + expr = expr.floorDiv(-vPos); + // Successfully constructed expression. + memo[pos] = expr; + changed = true; + } + // This loop is guaranteed to reach a fixed point - since once an + // variable's explicit form is computed (in memo[pos]), it's not updated + // again. + } while (changed); + + int64_t ubAdjustment = closedUB ? 0 : 1; + + // Set the lower and upper bound maps for all the variables that were + // computed as affine expressions of the rest as the "detected expr" and + // "detected expr + 1" respectively; set the undetected ones to null. + std::optional tmpClone; + for (unsigned pos = 0; pos < num; pos++) { + unsigned numMapDims = getNumDimVars() - num; + unsigned numMapSymbols = getNumSymbolVars(); + AffineExpr expr = memo[pos + offset]; + if (expr) + expr = simplifyAffineExpr(expr, numMapDims, numMapSymbols); + + AffineMap &lbMap = (*lbMaps)[pos]; + AffineMap &ubMap = (*ubMaps)[pos]; + + if (expr) { + lbMap = AffineMap::get(numMapDims, numMapSymbols, expr); + ubMap = AffineMap::get(numMapDims, numMapSymbols, expr + ubAdjustment); + } else { + // TODO: Whenever there are local variables in the dependence + // constraints, we'll conservatively over-approximate, since we don't + // always explicitly compute them above (in the while loop). + if (getNumLocalVars() == 0) { + // Work on a copy so that we don't update this constraint system. + if (!tmpClone) { + tmpClone.emplace(FlatLinearConstraints(*this)); + // Removing redundant inequalities is necessary so that we don't get + // redundant loop bounds. + tmpClone->removeRedundantInequalities(); + } + std::tie(lbMap, ubMap) = tmpClone->getLowerAndUpperBound( + pos, offset, num, getNumDimVars(), /*localExprs=*/{}, context, + closedUB); + } + + // If the above fails, we'll just use the constant lower bound and the + // constant upper bound (if they exist) as the slice bounds. + // TODO: being conservative for the moment in cases that + // lead to multiple bounds - until getConstDifference in LoopFusion.cpp is + // fixed (b/126426796). + if (!lbMap || lbMap.getNumResults() > 1) { + LLVM_DEBUG(llvm::dbgs() + << "WARNING: Potentially over-approximating slice lb\n"); + auto lbConst = getConstantBound64(BoundType::LB, pos + offset); + if (lbConst.has_value()) { + lbMap = AffineMap::get(numMapDims, numMapSymbols, + getAffineConstantExpr(*lbConst, context)); + } + } + if (!ubMap || ubMap.getNumResults() > 1) { + LLVM_DEBUG(llvm::dbgs() + << "WARNING: Potentially over-approximating slice ub\n"); + auto ubConst = getConstantBound64(BoundType::UB, pos + offset); + if (ubConst.has_value()) { + ubMap = AffineMap::get( + numMapDims, numMapSymbols, + getAffineConstantExpr(*ubConst + ubAdjustment, context)); + } + } + } + LLVM_DEBUG(llvm::dbgs() + << "lb map for pos = " << Twine(pos + offset) << ", expr: "); + LLVM_DEBUG(lbMap.dump();); + LLVM_DEBUG(llvm::dbgs() + << "ub map for pos = " << Twine(pos + offset) << ", expr: "); + LLVM_DEBUG(ubMap.dump();); + } +} + +LogicalResult FlatLinearConstraints::flattenAlignedMapAndMergeLocals( + AffineMap map, std::vector> *flattenedExprs) { + FlatLinearConstraints localCst; + if (failed(getFlattenedAffineExprs(map, flattenedExprs, &localCst))) { + LLVM_DEBUG(llvm::dbgs() + << "composition unimplemented for semi-affine maps\n"); + return failure(); + } + + // Add localCst information. + if (localCst.getNumLocalVars() > 0) { + unsigned numLocalVars = getNumLocalVars(); + // Insert local dims of localCst at the beginning. + insertLocalVar(/*pos=*/0, /*num=*/localCst.getNumLocalVars()); + // Insert local dims of `this` at the end of localCst. + localCst.appendLocalVar(/*num=*/numLocalVars); + // Dimensions of localCst and this constraint set match. Append localCst to + // this constraint set. + append(localCst); + } + + return success(); +} + +LogicalResult FlatLinearConstraints::addBound(BoundType type, unsigned pos, + AffineMap boundMap, + bool isClosedBound) { + assert(boundMap.getNumDims() == getNumDimVars() && "dim mismatch"); + assert(boundMap.getNumSymbols() == getNumSymbolVars() && "symbol mismatch"); + assert(pos < getNumDimAndSymbolVars() && "invalid position"); + assert((type != BoundType::EQ || isClosedBound) && + "EQ bound must be closed."); + + // Equality follows the logic of lower bound except that we add an equality + // instead of an inequality. + assert((type != BoundType::EQ || boundMap.getNumResults() == 1) && + "single result expected"); + bool lower = type == BoundType::LB || type == BoundType::EQ; + + std::vector> flatExprs; + if (failed(flattenAlignedMapAndMergeLocals(boundMap, &flatExprs))) + return failure(); + assert(flatExprs.size() == boundMap.getNumResults()); + + // Add one (in)equality for each result. + for (const auto &flatExpr : flatExprs) { + SmallVector ineq(getNumCols(), 0); + // Dims and symbols. + for (unsigned j = 0, e = boundMap.getNumInputs(); j < e; j++) { + ineq[j] = lower ? -flatExpr[j] : flatExpr[j]; + } + // Invalid bound: pos appears in `boundMap`. + // TODO: This should be an assertion. Fix `addDomainFromSliceMaps` and/or + // its callers to prevent invalid bounds from being added. + if (ineq[pos] != 0) + continue; + ineq[pos] = lower ? 1 : -1; + // Local columns of `ineq` are at the beginning. + unsigned j = getNumDimVars() + getNumSymbolVars(); + unsigned end = flatExpr.size() - 1; + for (unsigned i = boundMap.getNumInputs(); i < end; i++, j++) { + ineq[j] = lower ? -flatExpr[i] : flatExpr[i]; + } + // Make the bound closed in if flatExpr is open. The inequality is always + // created in the upper bound form, so the adjustment is -1. + int64_t boundAdjustment = (isClosedBound || type == BoundType::EQ) ? 0 : -1; + // Constant term. + ineq[getNumCols() - 1] = (lower ? -flatExpr[flatExpr.size() - 1] + : flatExpr[flatExpr.size() - 1]) + + boundAdjustment; + type == BoundType::EQ ? addEquality(ineq) : addInequality(ineq); + } + + return success(); +} + +LogicalResult FlatLinearConstraints::addBound(BoundType type, unsigned pos, + AffineMap boundMap) { + return addBound(type, pos, boundMap, /*isClosedBound=*/type != BoundType::UB); +} + +/// Compute an explicit representation for local vars. For all systems coming +/// from MLIR integer sets, maps, or expressions where local vars were +/// introduced to model floordivs and mods, this always succeeds. +LogicalResult +FlatLinearConstraints::computeLocalVars(SmallVectorImpl &memo, + MLIRContext *context) const { + unsigned numDims = getNumDimVars(); + unsigned numSyms = getNumSymbolVars(); + + // Initialize dimensional and symbolic variables. + for (unsigned i = 0; i < numDims; i++) + memo[i] = getAffineDimExpr(i, context); + for (unsigned i = numDims, e = numDims + numSyms; i < e; i++) + memo[i] = getAffineSymbolExpr(i - numDims, context); + + bool changed; + do { + // Each time `changed` is true at the end of this iteration, one or more + // local vars would have been detected as floordivs and set in memo; so the + // number of null entries in memo[...] strictly reduces; so this converges. + changed = false; + for (unsigned i = 0, e = getNumLocalVars(); i < e; ++i) + if (!memo[numDims + numSyms + i] && + detectAsFloorDiv(*this, /*pos=*/numDims + numSyms + i, context, memo)) + changed = true; + } while (changed); + + ArrayRef localExprs = + ArrayRef(memo).take_back(getNumLocalVars()); + return success( + llvm::all_of(localExprs, [](AffineExpr expr) { return expr; })); +} + +IntegerSet FlatLinearConstraints::getAsIntegerSet(MLIRContext *context) const { + if (getNumConstraints() == 0) + // Return universal set (always true): 0 == 0. + return IntegerSet::get(getNumDimVars(), getNumSymbolVars(), + getAffineConstantExpr(/*constant=*/0, context), + /*eqFlags=*/true); + + // Construct local references. + SmallVector memo(getNumVars(), AffineExpr()); + + if (failed(computeLocalVars(memo, context))) { + // Check if the local variables without an explicit representation have + // zero coefficients everywhere. + SmallVector noLocalRepVars; + unsigned numDimsSymbols = getNumDimAndSymbolVars(); + for (unsigned i = numDimsSymbols, e = getNumVars(); i < e; ++i) { + if (!memo[i] && !isColZero(/*pos=*/i)) + noLocalRepVars.push_back(i - numDimsSymbols); + } + if (!noLocalRepVars.empty()) { + LLVM_DEBUG({ + llvm::dbgs() << "local variables at position(s) "; + llvm::interleaveComma(noLocalRepVars, llvm::dbgs()); + llvm::dbgs() << " do not have an explicit representation in:\n"; + this->dump(); + }); + return IntegerSet(); + } + } + + ArrayRef localExprs = + ArrayRef(memo).take_back(getNumLocalVars()); + + // Construct the IntegerSet from the equalities/inequalities. + unsigned numDims = getNumDimVars(); + unsigned numSyms = getNumSymbolVars(); + + SmallVector eqFlags(getNumConstraints()); + std::fill(eqFlags.begin(), eqFlags.begin() + getNumEqualities(), true); + std::fill(eqFlags.begin() + getNumEqualities(), eqFlags.end(), false); + + SmallVector exprs; + exprs.reserve(getNumConstraints()); + + for (unsigned i = 0, e = getNumEqualities(); i < e; ++i) + exprs.push_back(getAffineExprFromFlatForm(getEquality64(i), numDims, + numSyms, localExprs, context)); + for (unsigned i = 0, e = getNumInequalities(); i < e; ++i) + exprs.push_back(getAffineExprFromFlatForm(getInequality64(i), numDims, + numSyms, localExprs, context)); + return IntegerSet::get(numDims, numSyms, exprs, eqFlags); +} + +//===----------------------------------------------------------------------===// +// FlatLinearValueConstraints +//===----------------------------------------------------------------------===// + +// Construct from an IntegerSet. +FlatLinearValueConstraints::FlatLinearValueConstraints(IntegerSet set, + ValueRange operands) + : FlatLinearConstraints(set.getNumInequalities(), set.getNumEqualities(), + set.getNumDims() + set.getNumSymbols() + 1, + set.getNumDims(), set.getNumSymbols(), + /*numLocals=*/0) { + // Populate values. + if (operands.empty()) { + values.resize(getNumDimAndSymbolVars(), std::nullopt); + } else { + assert(set.getNumInputs() == operands.size() && "operand count mismatch"); + values.assign(operands.begin(), operands.end()); + } + + // Flatten expressions and add them to the constraint system. + std::vector> flatExprs; + FlatLinearConstraints localVarCst; + if (failed(getFlattenedAffineExprs(set, &flatExprs, &localVarCst))) { + assert(false && "flattening unimplemented for semi-affine integer sets"); + return; + } + assert(flatExprs.size() == set.getNumConstraints()); + insertVar(VarKind::Local, getNumVarKind(VarKind::Local), + /*num=*/localVarCst.getNumLocalVars()); + + for (unsigned i = 0, e = flatExprs.size(); i < e; ++i) { + const auto &flatExpr = flatExprs[i]; + assert(flatExpr.size() == getNumCols()); + if (set.getEqFlags()[i]) { + addEquality(flatExpr); + } else { + addInequality(flatExpr); + } + } + // Add the other constraints involving local vars from flattening. + append(localVarCst); +} + +// Construct a hyperrectangular constraint set from ValueRanges that represent +// induction variables, lower and upper bounds. `ivs`, `lbs` and `ubs` are +// expected to match one to one. The order of variables and constraints is: +// +// ivs | lbs | ubs | eq/ineq +// ----+-----+-----+--------- +// 1 -1 0 >= 0 +// ----+-----+-----+--------- +// -1 0 1 >= 0 +// +// All dimensions as set as VarKind::SetDim. +FlatLinearValueConstraints +FlatLinearValueConstraints::getHyperrectangular(ValueRange ivs, ValueRange lbs, + ValueRange ubs) { + FlatLinearValueConstraints res; + unsigned nIvs = ivs.size(); + assert(nIvs == lbs.size() && "expected as many lower bounds as ivs"); + assert(nIvs == ubs.size() && "expected as many upper bounds as ivs"); + + if (nIvs == 0) + return res; + + res.appendDimVar(ivs); + unsigned lbsStart = res.appendDimVar(lbs); + unsigned ubsStart = res.appendDimVar(ubs); + + MLIRContext *ctx = ivs.front().getContext(); + for (int ivIdx = 0, e = nIvs; ivIdx < e; ++ivIdx) { + // iv - lb >= 0 + AffineMap lb = AffineMap::get(/*dimCount=*/3 * nIvs, /*symbolCount=*/0, + getAffineDimExpr(lbsStart + ivIdx, ctx)); + if (failed(res.addBound(BoundType::LB, ivIdx, lb))) + llvm_unreachable("Unexpected FlatLinearValueConstraints creation error"); + // -iv + ub >= 0 + AffineMap ub = AffineMap::get(/*dimCount=*/3 * nIvs, /*symbolCount=*/0, + getAffineDimExpr(ubsStart + ivIdx, ctx)); + if (failed(res.addBound(BoundType::UB, ivIdx, ub))) + llvm_unreachable("Unexpected FlatLinearValueConstraints creation error"); + } + return res; +} + +unsigned FlatLinearValueConstraints::appendDimVar(ValueRange vals) { + unsigned pos = getNumDimVars(); + return insertVar(VarKind::SetDim, pos, vals); +} + +unsigned FlatLinearValueConstraints::appendSymbolVar(ValueRange vals) { + unsigned pos = getNumSymbolVars(); + return insertVar(VarKind::Symbol, pos, vals); +} + +unsigned FlatLinearValueConstraints::insertDimVar(unsigned pos, + ValueRange vals) { + return insertVar(VarKind::SetDim, pos, vals); +} + +unsigned FlatLinearValueConstraints::insertSymbolVar(unsigned pos, + ValueRange vals) { + return insertVar(VarKind::Symbol, pos, vals); +} + +unsigned FlatLinearValueConstraints::insertVar(VarKind kind, unsigned pos, + unsigned num) { + unsigned absolutePos = IntegerPolyhedron::insertVar(kind, pos, num); + + if (kind != VarKind::Local) { + values.insert(values.begin() + absolutePos, num, std::nullopt); + assert(values.size() == getNumDimAndSymbolVars()); + } + + return absolutePos; +} + +unsigned FlatLinearValueConstraints::insertVar(VarKind kind, unsigned pos, + ValueRange vals) { + assert(!vals.empty() && "expected ValueRange with Values."); + assert(kind != VarKind::Local && + "values cannot be attached to local variables."); + unsigned num = vals.size(); + unsigned absolutePos = IntegerPolyhedron::insertVar(kind, pos, num); + + // If a Value is provided, insert it; otherwise use None. + for (unsigned i = 0; i < num; ++i) + values.insert(values.begin() + absolutePos + i, + vals[i] ? std::optional(vals[i]) : std::nullopt); + + assert(values.size() == getNumDimAndSymbolVars()); + return absolutePos; +} + +bool FlatLinearValueConstraints::hasValues() const { + return llvm::any_of( + values, [](const std::optional &var) { return var.has_value(); }); +} + +/// Checks if two constraint systems are in the same space, i.e., if they are +/// associated with the same set of variables, appearing in the same order. +static bool areVarsAligned(const FlatLinearValueConstraints &a, + const FlatLinearValueConstraints &b) { + return a.getNumDimVars() == b.getNumDimVars() && + a.getNumSymbolVars() == b.getNumSymbolVars() && + a.getNumVars() == b.getNumVars() && + a.getMaybeValues().equals(b.getMaybeValues()); +} + +/// Calls areVarsAligned to check if two constraint systems have the same set +/// of variables in the same order. +bool FlatLinearValueConstraints::areVarsAlignedWithOther( + const FlatLinearConstraints &other) { + return areVarsAligned(*this, other); +} + +/// Checks if the SSA values associated with `cst`'s variables in range +/// [start, end) are unique. +static bool LLVM_ATTRIBUTE_UNUSED areVarsUnique( + const FlatLinearValueConstraints &cst, unsigned start, unsigned end) { + + assert(start <= cst.getNumDimAndSymbolVars() && + "Start position out of bounds"); + assert(end <= cst.getNumDimAndSymbolVars() && "End position out of bounds"); + + if (start >= end) + return true; + + SmallPtrSet uniqueVars; + ArrayRef> maybeValues = + cst.getMaybeValues().slice(start, end - start); + for (std::optional val : maybeValues) { + if (val && !uniqueVars.insert(*val).second) + return false; + } + return true; +} + +/// Checks if the SSA values associated with `cst`'s variables are unique. +static bool LLVM_ATTRIBUTE_UNUSED +areVarsUnique(const FlatLinearValueConstraints &cst) { + return areVarsUnique(cst, 0, cst.getNumDimAndSymbolVars()); +} + +/// Checks if the SSA values associated with `cst`'s variables of kind `kind` +/// are unique. +static bool LLVM_ATTRIBUTE_UNUSED +areVarsUnique(const FlatLinearValueConstraints &cst, VarKind kind) { + + if (kind == VarKind::SetDim) + return areVarsUnique(cst, 0, cst.getNumDimVars()); + if (kind == VarKind::Symbol) + return areVarsUnique(cst, cst.getNumDimVars(), + cst.getNumDimAndSymbolVars()); + llvm_unreachable("Unexpected VarKind"); +} + +/// Merge and align the variables of A and B starting at 'offset', so that +/// both constraint systems get the union of the contained variables that is +/// dimension-wise and symbol-wise unique; both constraint systems are updated +/// so that they have the union of all variables, with A's original +/// variables appearing first followed by any of B's variables that didn't +/// appear in A. Local variables in B that have the same division +/// representation as local variables in A are merged into one. +// E.g.: Input: A has ((%i, %j) [%M, %N]) and B has (%k, %j) [%P, %N, %M]) +// Output: both A, B have (%i, %j, %k) [%M, %N, %P] +static void mergeAndAlignVars(unsigned offset, FlatLinearValueConstraints *a, + FlatLinearValueConstraints *b) { + assert(offset <= a->getNumDimVars() && offset <= b->getNumDimVars()); + // A merge/align isn't meaningful if a cst's vars aren't distinct. + assert(areVarsUnique(*a) && "A's values aren't unique"); + assert(areVarsUnique(*b) && "B's values aren't unique"); + + assert(llvm::all_of( + llvm::drop_begin(a->getMaybeValues(), offset), + [](const std::optional &var) { return var.has_value(); })); + + assert(llvm::all_of( + llvm::drop_begin(b->getMaybeValues(), offset), + [](const std::optional &var) { return var.has_value(); })); + + SmallVector aDimValues; + a->getValues(offset, a->getNumDimVars(), &aDimValues); + + { + // Merge dims from A into B. + unsigned d = offset; + for (auto aDimValue : aDimValues) { + unsigned loc; + if (b->findVar(aDimValue, &loc)) { + assert(loc >= offset && "A's dim appears in B's aligned range"); + assert(loc < b->getNumDimVars() && + "A's dim appears in B's non-dim position"); + b->swapVar(d, loc); + } else { + b->insertDimVar(d, aDimValue); + } + d++; + } + // Dimensions that are in B, but not in A, are added at the end. + for (unsigned t = a->getNumDimVars(), e = b->getNumDimVars(); t < e; t++) { + a->appendDimVar(b->getValue(t)); + } + assert(a->getNumDimVars() == b->getNumDimVars() && + "expected same number of dims"); + } + + // Merge and align symbols of A and B + a->mergeSymbolVars(*b); + // Merge and align locals of A and B + a->mergeLocalVars(*b); + + assert(areVarsAligned(*a, *b) && "IDs expected to be aligned"); +} + +// Call 'mergeAndAlignVars' to align constraint systems of 'this' and 'other'. +void FlatLinearValueConstraints::mergeAndAlignVarsWithOther( + unsigned offset, FlatLinearValueConstraints *other) { + mergeAndAlignVars(offset, this, other); +} + +/// Merge and align symbols of `this` and `other` such that both get union of +/// of symbols that are unique. Symbols in `this` and `other` should be +/// unique. Symbols with Value as `None` are considered to be inequal to all +/// other symbols. +void FlatLinearValueConstraints::mergeSymbolVars( + FlatLinearValueConstraints &other) { + + assert(areVarsUnique(*this, VarKind::Symbol) && "Symbol vars are not unique"); + assert(areVarsUnique(other, VarKind::Symbol) && "Symbol vars are not unique"); + + SmallVector aSymValues; + getValues(getNumDimVars(), getNumDimAndSymbolVars(), &aSymValues); + + // Merge symbols: merge symbols into `other` first from `this`. + unsigned s = other.getNumDimVars(); + for (Value aSymValue : aSymValues) { + unsigned loc; + // If the var is a symbol in `other`, then align it, otherwise assume that + // it is a new symbol + if (other.findVar(aSymValue, &loc) && loc >= other.getNumDimVars() && + loc < other.getNumDimAndSymbolVars()) + other.swapVar(s, loc); + else + other.insertSymbolVar(s - other.getNumDimVars(), aSymValue); + s++; + } + + // Symbols that are in other, but not in this, are added at the end. + for (unsigned t = other.getNumDimVars() + getNumSymbolVars(), + e = other.getNumDimAndSymbolVars(); + t < e; t++) + insertSymbolVar(getNumSymbolVars(), other.getValue(t)); + + assert(getNumSymbolVars() == other.getNumSymbolVars() && + "expected same number of symbols"); + assert(areVarsUnique(*this, VarKind::Symbol) && "Symbol vars are not unique"); + assert(areVarsUnique(other, VarKind::Symbol) && "Symbol vars are not unique"); +} + +bool FlatLinearValueConstraints::hasConsistentState() const { + return IntegerPolyhedron::hasConsistentState() && + values.size() == getNumDimAndSymbolVars(); +} + +void FlatLinearValueConstraints::removeVarRange(VarKind kind, unsigned varStart, + unsigned varLimit) { + IntegerPolyhedron::removeVarRange(kind, varStart, varLimit); + unsigned offset = getVarKindOffset(kind); + + if (kind != VarKind::Local) { + values.erase(values.begin() + varStart + offset, + values.begin() + varLimit + offset); + } +} + +AffineMap +FlatLinearValueConstraints::computeAlignedMap(AffineMap map, + ValueRange operands) const { + assert(map.getNumInputs() == operands.size() && "number of inputs mismatch"); + + SmallVector dims, syms; +#ifndef NDEBUG + SmallVector newSyms; + SmallVector *newSymsPtr = &newSyms; +#else + SmallVector *newSymsPtr = nullptr; +#endif // NDEBUG + + dims.reserve(getNumDimVars()); + syms.reserve(getNumSymbolVars()); + for (unsigned i = getVarKindOffset(VarKind::SetDim), + e = getVarKindEnd(VarKind::SetDim); + i < e; ++i) + dims.push_back(values[i] ? *values[i] : Value()); + for (unsigned i = getVarKindOffset(VarKind::Symbol), + e = getVarKindEnd(VarKind::Symbol); + i < e; ++i) + syms.push_back(values[i] ? *values[i] : Value()); + + AffineMap alignedMap = + alignAffineMapWithValues(map, operands, dims, syms, newSymsPtr); + // All symbols are already part of this FlatAffineValueConstraints. + assert(syms.size() == newSymsPtr->size() && "unexpected new/missing symbols"); + assert(std::equal(syms.begin(), syms.end(), newSymsPtr->begin()) && + "unexpected new/missing symbols"); + return alignedMap; +} + +bool FlatLinearValueConstraints::findVar(Value val, unsigned *pos) const { + unsigned i = 0; + for (const auto &mayBeVar : values) { + if (mayBeVar && *mayBeVar == val) { + *pos = i; + return true; + } + i++; + } + return false; +} + +bool FlatLinearValueConstraints::containsVar(Value val) const { + return llvm::any_of(values, [&](const std::optional &mayBeVar) { + return mayBeVar && *mayBeVar == val; + }); +} + +void FlatLinearValueConstraints::swapVar(unsigned posA, unsigned posB) { + IntegerPolyhedron::swapVar(posA, posB); + + if (getVarKindAt(posA) == VarKind::Local && + getVarKindAt(posB) == VarKind::Local) + return; + + // Treat value of a local variable as std::nullopt. + if (getVarKindAt(posA) == VarKind::Local) + values[posB] = std::nullopt; + else if (getVarKindAt(posB) == VarKind::Local) + values[posA] = std::nullopt; + else + std::swap(values[posA], values[posB]); +} + +void FlatLinearValueConstraints::addBound(BoundType type, Value val, + int64_t value) { + unsigned pos; + if (!findVar(val, &pos)) + // This is a pre-condition for this method. + assert(0 && "var not found"); + addBound(type, pos, value); +} + +void FlatLinearConstraints::printSpace(raw_ostream &os) const { + IntegerPolyhedron::printSpace(os); + os << "("; + for (unsigned i = 0, e = getNumDimAndSymbolVars(); i < e; i++) + os << "None\t"; + for (unsigned i = getVarKindOffset(VarKind::Local), + e = getVarKindEnd(VarKind::Local); + i < e; ++i) + os << "Local\t"; + os << "const)\n"; +} + +void FlatLinearValueConstraints::printSpace(raw_ostream &os) const { + IntegerPolyhedron::printSpace(os); + os << "("; + for (unsigned i = 0, e = getNumDimAndSymbolVars(); i < e; i++) { + if (hasValue(i)) + os << "Value\t"; + else + os << "None\t"; + } + for (unsigned i = getVarKindOffset(VarKind::Local), + e = getVarKindEnd(VarKind::Local); + i < e; ++i) + os << "Local\t"; + os << "const)\n"; +} + +void FlatLinearValueConstraints::clearAndCopyFrom( + const IntegerRelation &other) { + + if (auto *otherValueSet = + dyn_cast(&other)) { + *this = *otherValueSet; + } else { + *static_cast(this) = other; + values.clear(); + values.resize(getNumDimAndSymbolVars(), std::nullopt); + } +} + +void FlatLinearValueConstraints::fourierMotzkinEliminate( + unsigned pos, bool darkShadow, bool *isResultIntegerExact) { + SmallVector, 8> newVals = values; + if (getVarKindAt(pos) != VarKind::Local) + newVals.erase(newVals.begin() + pos); + // Note: Base implementation discards all associated Values. + IntegerPolyhedron::fourierMotzkinEliminate(pos, darkShadow, + isResultIntegerExact); + values = newVals; + assert(values.size() == getNumDimAndSymbolVars()); +} + +void FlatLinearValueConstraints::projectOut(Value val) { + unsigned pos; + bool ret = findVar(val, &pos); + assert(ret); + (void)ret; + fourierMotzkinEliminate(pos); +} + +LogicalResult FlatLinearValueConstraints::unionBoundingBox( + const FlatLinearValueConstraints &otherCst) { + assert(otherCst.getNumDimVars() == getNumDimVars() && "dims mismatch"); + assert(otherCst.getMaybeValues() + .slice(0, getNumDimVars()) + .equals(getMaybeValues().slice(0, getNumDimVars())) && + "dim values mismatch"); + assert(otherCst.getNumLocalVars() == 0 && "local vars not supported here"); + assert(getNumLocalVars() == 0 && "local vars not supported yet here"); + + // Align `other` to this. + if (!areVarsAligned(*this, otherCst)) { + FlatLinearValueConstraints otherCopy(otherCst); + mergeAndAlignVars(/*offset=*/getNumDimVars(), this, &otherCopy); + return IntegerPolyhedron::unionBoundingBox(otherCopy); + } + + return IntegerPolyhedron::unionBoundingBox(otherCst); +} + +//===----------------------------------------------------------------------===// +// Helper functions +//===----------------------------------------------------------------------===// + +AffineMap mlir::alignAffineMapWithValues(AffineMap map, ValueRange operands, + ValueRange dims, ValueRange syms, + SmallVector *newSyms) { + assert(operands.size() == map.getNumInputs() && + "expected same number of operands and map inputs"); + MLIRContext *ctx = map.getContext(); + Builder builder(ctx); + SmallVector dimReplacements(map.getNumDims(), {}); + unsigned numSymbols = syms.size(); + SmallVector symReplacements(map.getNumSymbols(), {}); + if (newSyms) { + newSyms->clear(); + newSyms->append(syms.begin(), syms.end()); + } + + for (const auto &operand : llvm::enumerate(operands)) { + // Compute replacement dim/sym of operand. + AffineExpr replacement; + auto dimIt = std::find(dims.begin(), dims.end(), operand.value()); + auto symIt = std::find(syms.begin(), syms.end(), operand.value()); + if (dimIt != dims.end()) { + replacement = + builder.getAffineDimExpr(std::distance(dims.begin(), dimIt)); + } else if (symIt != syms.end()) { + replacement = + builder.getAffineSymbolExpr(std::distance(syms.begin(), symIt)); + } else { + // This operand is neither a dimension nor a symbol. Add it as a new + // symbol. + replacement = builder.getAffineSymbolExpr(numSymbols++); + if (newSyms) + newSyms->push_back(operand.value()); + } + // Add to corresponding replacements vector. + if (operand.index() < map.getNumDims()) { + dimReplacements[operand.index()] = replacement; + } else { + symReplacements[operand.index() - map.getNumDims()] = replacement; + } + } + + return map.replaceDimsAndSymbols(dimReplacements, symReplacements, + dims.size(), numSymbols); +} + +LogicalResult +mlir::getMultiAffineFunctionFromMap(AffineMap map, + MultiAffineFunction &multiAff) { + FlatLinearConstraints cst; + std::vector> flattenedExprs; + LogicalResult result = getFlattenedAffineExprs(map, &flattenedExprs, &cst); + + if (result.failed()) + return failure(); + + DivisionRepr divs = cst.getLocalReprs(); + assert(divs.hasAllReprs() && + "AffineMap cannot produce divs without local representation"); + + // TODO: We shouldn't have to do this conversion. + Matrix mat(map.getNumResults(), map.getNumInputs() + divs.getNumDivs() + 1); + for (unsigned i = 0, e = flattenedExprs.size(); i < e; ++i) + for (unsigned j = 0, f = flattenedExprs[i].size(); j < f; ++j) + mat(i, j) = flattenedExprs[i][j]; + + multiAff = MultiAffineFunction( + PresburgerSpace::getRelationSpace(map.getNumDims(), map.getNumResults(), + map.getNumSymbols(), divs.getNumDivs()), + mat, divs); + + return success(); +} diff --git a/mlir/lib/Analysis/Presburger/PWMAFunction.cpp b/mlir/lib/Analysis/Presburger/PWMAFunction.cpp index 64b9ba6bf7a0e..ce9e810069c48 100644 --- a/mlir/lib/Analysis/Presburger/PWMAFunction.cpp +++ b/mlir/lib/Analysis/Presburger/PWMAFunction.cpp @@ -231,14 +231,14 @@ MultiAffineFunction::getLexSet(OrderingKind comp, // outA - outB <= -1 // outA <= outB - 1 // outA < outB - levelSet.addBound(IntegerPolyhedron::BoundType::UB, subExpr, MPInt(-1)); + levelSet.addBound(BoundType::UB, subExpr, MPInt(-1)); break; case OrderingKind::GT: // For greater than, we add a lower bound of 1: // outA - outB >= 1 // outA > outB + 1 // outA > outB - levelSet.addBound(IntegerPolyhedron::BoundType::LB, subExpr, MPInt(1)); + levelSet.addBound(BoundType::LB, subExpr, MPInt(1)); break; case OrderingKind::GE: case OrderingKind::LE: diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp index 271a09539e46e..be24f5ee5feb4 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp @@ -471,11 +471,6 @@ createLinalgBodyCalculationForElementwiseOp(Operation *op, ValueRange args, } if (arith::FPToSIOp::areCastCompatible(srcTy, dstTy)) { - auto zero = rewriter.create( - loc, rewriter.getF32FloatAttr(0.0f)); - auto half = rewriter.create( - loc, rewriter.getF32FloatAttr(0.5f)); - auto intMin = rewriter.create( loc, rewriter.getF32FloatAttr( APInt::getSignedMinValue(dstTy.getIntOrFloatBitWidth()) @@ -486,12 +481,7 @@ createLinalgBodyCalculationForElementwiseOp(Operation *op, ValueRange args, APInt::getSignedMaxValue(dstTy.getIntOrFloatBitWidth()) .getSExtValue())); - auto added = rewriter.create(loc, args[0], half); - auto subbed = rewriter.create(loc, args[0], half); - auto negative = rewriter.create( - loc, arith::CmpFPredicate::OLT, args[0], zero); - auto rounded = - rewriter.create(loc, negative, subbed, added); + auto rounded = rewriter.create(loc, args[0]); auto clamped = clampFloatHelper(loc, rounded, intMin, intMax, rewriter); diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index c56d03f6f31d7..05def0f45d7fb 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -16,6 +16,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/Vector/Interfaces/MaskableOpInterface.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" #include "mlir/Dialect/Vector/Transforms/VectorTransforms.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/TypeUtilities.h" diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp index fb544df18324b..3f1b107f6f8e0 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp @@ -19,6 +19,7 @@ #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" #include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h" #include "mlir/Dialect/X86Vector/Transforms.h" #include "mlir/Dialect/X86Vector/X86VectorDialect.h" @@ -64,10 +65,11 @@ void LowerVectorToLLVMPass::runOnOperation() { RewritePatternSet patterns(&getContext()); populateVectorToVectorCanonicalizationPatterns(patterns); populateVectorBroadcastLoweringPatterns(patterns); - populateVectorContractLoweringPatterns(patterns); + populateVectorContractLoweringPatterns(patterns, VectorTransformsOptions()); populateVectorMaskOpLoweringPatterns(patterns); populateVectorShapeCastLoweringPatterns(patterns); - populateVectorTransposeLoweringPatterns(patterns); + populateVectorTransposeLoweringPatterns(patterns, + VectorTransformsOptions()); // Vector transfer ops with rank > 1 should be lowered with VectorToSCF. populateVectorTransferLoweringPatterns(patterns, /*maxTransferRank=*/1); (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index d8070b34a761d..ec2e2aa4c0624 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#include #include +#include #include "mlir/Conversion/VectorToSCF/VectorToSCF.h" @@ -20,6 +20,7 @@ #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" #include "mlir/Dialect/Vector/Transforms/VectorTransforms.h" #include "mlir/IR/Builders.h" #include "mlir/IR/ImplicitLocOpBuilder.h" diff --git a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp index d7720a052e0dd..da8f0883d7d5d 100644 --- a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp @@ -445,12 +445,10 @@ static void computeDirectionVector( dependenceComponents->resize(numCommonLoops); for (unsigned j = 0; j < numCommonLoops; ++j) { (*dependenceComponents)[j].op = commonLoops[j].getOperation(); - auto lbConst = - dependenceDomain->getConstantBound64(IntegerPolyhedron::LB, j); + auto lbConst = dependenceDomain->getConstantBound64(BoundType::LB, j); (*dependenceComponents)[j].lb = lbConst.value_or(std::numeric_limits::min()); - auto ubConst = - dependenceDomain->getConstantBound64(IntegerPolyhedron::UB, j); + auto ubConst = dependenceDomain->getConstantBound64(BoundType::UB, j); (*dependenceComponents)[j].ub = ubConst.value_or(std::numeric_limits::max()); } diff --git a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp index 03b8b1d72a5fa..f087dca20f34c 100644 --- a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp @@ -33,504 +33,6 @@ using namespace mlir; using namespace presburger; -namespace { - -// See comments for SimpleAffineExprFlattener. -// An AffineExprFlattener extends a SimpleAffineExprFlattener by recording -// constraint information associated with mod's, floordiv's, and ceildiv's -// in FlatAffineValueConstraints 'localVarCst'. -struct AffineExprFlattener : public SimpleAffineExprFlattener { -public: - // Constraints connecting newly introduced local variables (for mod's and - // div's) to existing (dimensional and symbolic) ones. These are always - // inequalities. - IntegerPolyhedron localVarCst; - - AffineExprFlattener(unsigned nDims, unsigned nSymbols) - : SimpleAffineExprFlattener(nDims, nSymbols), - localVarCst(PresburgerSpace::getSetSpace(nDims, nSymbols)) {} - -private: - // Add a local variable (needed to flatten a mod, floordiv, ceildiv expr). - // The local variable added is always a floordiv of a pure add/mul affine - // function of other variables, coefficients of which are specified in - // `dividend' and with respect to the positive constant `divisor'. localExpr - // is the simplified tree expression (AffineExpr) corresponding to the - // quantifier. - void addLocalFloorDivId(ArrayRef dividend, int64_t divisor, - AffineExpr localExpr) override { - SimpleAffineExprFlattener::addLocalFloorDivId(dividend, divisor, localExpr); - // Update localVarCst. - localVarCst.addLocalFloorDiv(dividend, divisor); - } -}; - -} // namespace - -// Flattens the expressions in map. Returns failure if 'expr' was unable to be -// flattened (i.e., semi-affine expressions not handled yet). -static LogicalResult -getFlattenedAffineExprs(ArrayRef exprs, unsigned numDims, - unsigned numSymbols, - std::vector> *flattenedExprs, - FlatAffineValueConstraints *localVarCst) { - if (exprs.empty()) { - if (localVarCst) - *localVarCst = FlatAffineValueConstraints(numDims, numSymbols); - return success(); - } - - AffineExprFlattener flattener(numDims, numSymbols); - // Use the same flattener to simplify each expression successively. This way - // local variables / expressions are shared. - for (auto expr : exprs) { - if (!expr.isPureAffine()) - return failure(); - - flattener.walkPostOrder(expr); - } - - assert(flattener.operandExprStack.size() == exprs.size()); - flattenedExprs->clear(); - flattenedExprs->assign(flattener.operandExprStack.begin(), - flattener.operandExprStack.end()); - - if (localVarCst) - localVarCst->clearAndCopyFrom(flattener.localVarCst); - - return success(); -} - -// Flattens 'expr' into 'flattenedExpr'. Returns failure if 'expr' was unable to -// be flattened (semi-affine expressions not handled yet). -LogicalResult -mlir::getFlattenedAffineExpr(AffineExpr expr, unsigned numDims, - unsigned numSymbols, - SmallVectorImpl *flattenedExpr, - FlatAffineValueConstraints *localVarCst) { - std::vector> flattenedExprs; - LogicalResult ret = ::getFlattenedAffineExprs({expr}, numDims, numSymbols, - &flattenedExprs, localVarCst); - *flattenedExpr = flattenedExprs[0]; - return ret; -} - -/// Flattens the expressions in map. Returns failure if 'expr' was unable to be -/// flattened (i.e., semi-affine expressions not handled yet). -LogicalResult mlir::getFlattenedAffineExprs( - AffineMap map, std::vector> *flattenedExprs, - FlatAffineValueConstraints *localVarCst) { - if (map.getNumResults() == 0) { - if (localVarCst) - *localVarCst = - FlatAffineValueConstraints(map.getNumDims(), map.getNumSymbols()); - return success(); - } - return ::getFlattenedAffineExprs(map.getResults(), map.getNumDims(), - map.getNumSymbols(), flattenedExprs, - localVarCst); -} - -LogicalResult mlir::getFlattenedAffineExprs( - IntegerSet set, std::vector> *flattenedExprs, - FlatAffineValueConstraints *localVarCst) { - if (set.getNumConstraints() == 0) { - if (localVarCst) - *localVarCst = - FlatAffineValueConstraints(set.getNumDims(), set.getNumSymbols()); - return success(); - } - return ::getFlattenedAffineExprs(set.getConstraints(), set.getNumDims(), - set.getNumSymbols(), flattenedExprs, - localVarCst); -} - -//===----------------------------------------------------------------------===// -// FlatAffineConstraints / FlatAffineValueConstraints. -//===----------------------------------------------------------------------===// - -std::unique_ptr -FlatAffineValueConstraints::clone() const { - return std::make_unique(*this); -} - -// Construct from an IntegerSet. -FlatAffineValueConstraints::FlatAffineValueConstraints(IntegerSet set, - ValueRange operands) - : IntegerPolyhedron(set.getNumInequalities(), set.getNumEqualities(), - set.getNumDims() + set.getNumSymbols() + 1, - PresburgerSpace::getSetSpace(set.getNumDims(), - set.getNumSymbols(), - /*numLocals=*/0)) { - // Populate values. - if (operands.empty()) { - values.resize(getNumDimAndSymbolVars(), std::nullopt); - } else { - assert(set.getNumInputs() == operands.size() && "operand count mismatch"); - values.assign(operands.begin(), operands.end()); - } - - // Flatten expressions and add them to the constraint system. - std::vector> flatExprs; - FlatAffineValueConstraints localVarCst; - if (failed(getFlattenedAffineExprs(set, &flatExprs, &localVarCst))) { - assert(false && "flattening unimplemented for semi-affine integer sets"); - return; - } - assert(flatExprs.size() == set.getNumConstraints()); - insertVar(VarKind::Local, getNumVarKind(VarKind::Local), - /*num=*/localVarCst.getNumLocalVars()); - - for (unsigned i = 0, e = flatExprs.size(); i < e; ++i) { - const auto &flatExpr = flatExprs[i]; - assert(flatExpr.size() == getNumCols()); - if (set.getEqFlags()[i]) { - addEquality(flatExpr); - } else { - addInequality(flatExpr); - } - } - // Add the other constraints involving local vars from flattening. - append(localVarCst); -} - -// Construct a hyperrectangular constraint set from ValueRanges that represent -// induction variables, lower and upper bounds. `ivs`, `lbs` and `ubs` are -// expected to match one to one. The order of variables and constraints is: -// -// ivs | lbs | ubs | eq/ineq -// ----+-----+-----+--------- -// 1 -1 0 >= 0 -// ----+-----+-----+--------- -// -1 0 1 >= 0 -// -// All dimensions as set as VarKind::SetDim. -FlatAffineValueConstraints -FlatAffineValueConstraints::getHyperrectangular(ValueRange ivs, ValueRange lbs, - ValueRange ubs) { - FlatAffineValueConstraints res; - unsigned nIvs = ivs.size(); - assert(nIvs == lbs.size() && "expected as many lower bounds as ivs"); - assert(nIvs == ubs.size() && "expected as many upper bounds as ivs"); - - if (nIvs == 0) - return res; - - res.appendDimVar(ivs); - unsigned lbsStart = res.appendDimVar(lbs); - unsigned ubsStart = res.appendDimVar(ubs); - - MLIRContext *ctx = ivs.front().getContext(); - for (int ivIdx = 0, e = nIvs; ivIdx < e; ++ivIdx) { - // iv - lb >= 0 - AffineMap lb = AffineMap::get(/*dimCount=*/3 * nIvs, /*symbolCount=*/0, - getAffineDimExpr(lbsStart + ivIdx, ctx)); - if (failed(res.addBound(BoundType::LB, ivIdx, lb))) - llvm_unreachable("Unexpected FlatAffineValueConstraints creation error"); - // -iv + ub >= 0 - AffineMap ub = AffineMap::get(/*dimCount=*/3 * nIvs, /*symbolCount=*/0, - getAffineDimExpr(ubsStart + ivIdx, ctx)); - if (failed(res.addBound(BoundType::UB, ivIdx, ub))) - llvm_unreachable("Unexpected FlatAffineValueConstraints creation error"); - } - return res; -} - -unsigned FlatAffineValueConstraints::appendDimVar(ValueRange vals) { - unsigned pos = getNumDimVars(); - return insertVar(VarKind::SetDim, pos, vals); -} - -unsigned FlatAffineValueConstraints::appendSymbolVar(ValueRange vals) { - unsigned pos = getNumSymbolVars(); - return insertVar(VarKind::Symbol, pos, vals); -} - -unsigned FlatAffineValueConstraints::insertDimVar(unsigned pos, - ValueRange vals) { - return insertVar(VarKind::SetDim, pos, vals); -} - -unsigned FlatAffineValueConstraints::insertSymbolVar(unsigned pos, - ValueRange vals) { - return insertVar(VarKind::Symbol, pos, vals); -} - -unsigned FlatAffineValueConstraints::insertVar(VarKind kind, unsigned pos, - unsigned num) { - unsigned absolutePos = IntegerPolyhedron::insertVar(kind, pos, num); - - if (kind != VarKind::Local) { - values.insert(values.begin() + absolutePos, num, std::nullopt); - assert(values.size() == getNumDimAndSymbolVars()); - } - - return absolutePos; -} - -unsigned FlatAffineValueConstraints::insertVar(VarKind kind, unsigned pos, - ValueRange vals) { - assert(!vals.empty() && "expected ValueRange with Values."); - assert(kind != VarKind::Local && - "values cannot be attached to local variables."); - unsigned num = vals.size(); - unsigned absolutePos = IntegerPolyhedron::insertVar(kind, pos, num); - - // If a Value is provided, insert it; otherwise use None. - for (unsigned i = 0; i < num; ++i) - values.insert(values.begin() + absolutePos + i, - vals[i] ? std::optional(vals[i]) : std::nullopt); - - assert(values.size() == getNumDimAndSymbolVars()); - return absolutePos; -} - -bool FlatAffineValueConstraints::hasValues() const { - return llvm::any_of( - values, [](const std::optional &var) { return var.has_value(); }); -} - -/// Checks if two constraint systems are in the same space, i.e., if they are -/// associated with the same set of variables, appearing in the same order. -static bool areVarsAligned(const FlatAffineValueConstraints &a, - const FlatAffineValueConstraints &b) { - return a.getNumDimVars() == b.getNumDimVars() && - a.getNumSymbolVars() == b.getNumSymbolVars() && - a.getNumVars() == b.getNumVars() && - a.getMaybeValues().equals(b.getMaybeValues()); -} - -/// Calls areVarsAligned to check if two constraint systems have the same set -/// of variables in the same order. -bool FlatAffineValueConstraints::areVarsAlignedWithOther( - const FlatAffineValueConstraints &other) { - return areVarsAligned(*this, other); -} - -/// Checks if the SSA values associated with `cst`'s variables in range -/// [start, end) are unique. -static bool LLVM_ATTRIBUTE_UNUSED areVarsUnique( - const FlatAffineValueConstraints &cst, unsigned start, unsigned end) { - - assert(start <= cst.getNumDimAndSymbolVars() && - "Start position out of bounds"); - assert(end <= cst.getNumDimAndSymbolVars() && "End position out of bounds"); - - if (start >= end) - return true; - - SmallPtrSet uniqueVars; - ArrayRef> maybeValues = - cst.getMaybeValues().slice(start, end - start); - for (std::optional val : maybeValues) { - if (val && !uniqueVars.insert(*val).second) - return false; - } - return true; -} - -/// Checks if the SSA values associated with `cst`'s variables are unique. -static bool LLVM_ATTRIBUTE_UNUSED -areVarsUnique(const FlatAffineValueConstraints &cst) { - return areVarsUnique(cst, 0, cst.getNumDimAndSymbolVars()); -} - -/// Checks if the SSA values associated with `cst`'s variables of kind `kind` -/// are unique. -static bool LLVM_ATTRIBUTE_UNUSED -areVarsUnique(const FlatAffineValueConstraints &cst, VarKind kind) { - - if (kind == VarKind::SetDim) - return areVarsUnique(cst, 0, cst.getNumDimVars()); - if (kind == VarKind::Symbol) - return areVarsUnique(cst, cst.getNumDimVars(), - cst.getNumDimAndSymbolVars()); - llvm_unreachable("Unexpected VarKind"); -} - -/// Merge and align the variables of A and B starting at 'offset', so that -/// both constraint systems get the union of the contained variables that is -/// dimension-wise and symbol-wise unique; both constraint systems are updated -/// so that they have the union of all variables, with A's original -/// variables appearing first followed by any of B's variables that didn't -/// appear in A. Local variables in B that have the same division -/// representation as local variables in A are merged into one. -// E.g.: Input: A has ((%i, %j) [%M, %N]) and B has (%k, %j) [%P, %N, %M]) -// Output: both A, B have (%i, %j, %k) [%M, %N, %P] -static void mergeAndAlignVars(unsigned offset, FlatAffineValueConstraints *a, - FlatAffineValueConstraints *b) { - assert(offset <= a->getNumDimVars() && offset <= b->getNumDimVars()); - // A merge/align isn't meaningful if a cst's vars aren't distinct. - assert(areVarsUnique(*a) && "A's values aren't unique"); - assert(areVarsUnique(*b) && "B's values aren't unique"); - - assert(llvm::all_of( - llvm::drop_begin(a->getMaybeValues(), offset), - [](const std::optional &var) { return var.has_value(); })); - - assert(llvm::all_of( - llvm::drop_begin(b->getMaybeValues(), offset), - [](const std::optional &var) { return var.has_value(); })); - - SmallVector aDimValues; - a->getValues(offset, a->getNumDimVars(), &aDimValues); - - { - // Merge dims from A into B. - unsigned d = offset; - for (auto aDimValue : aDimValues) { - unsigned loc; - if (b->findVar(aDimValue, &loc)) { - assert(loc >= offset && "A's dim appears in B's aligned range"); - assert(loc < b->getNumDimVars() && - "A's dim appears in B's non-dim position"); - b->swapVar(d, loc); - } else { - b->insertDimVar(d, aDimValue); - } - d++; - } - // Dimensions that are in B, but not in A, are added at the end. - for (unsigned t = a->getNumDimVars(), e = b->getNumDimVars(); t < e; t++) { - a->appendDimVar(b->getValue(t)); - } - assert(a->getNumDimVars() == b->getNumDimVars() && - "expected same number of dims"); - } - - // Merge and align symbols of A and B - a->mergeSymbolVars(*b); - // Merge and align locals of A and B - a->mergeLocalVars(*b); - - assert(areVarsAligned(*a, *b) && "IDs expected to be aligned"); -} - -// Call 'mergeAndAlignVars' to align constraint systems of 'this' and 'other'. -void FlatAffineValueConstraints::mergeAndAlignVarsWithOther( - unsigned offset, FlatAffineValueConstraints *other) { - mergeAndAlignVars(offset, this, other); -} - -LogicalResult -FlatAffineValueConstraints::composeMap(const AffineValueMap *vMap) { - return composeMatchingMap( - computeAlignedMap(vMap->getAffineMap(), vMap->getOperands())); -} - -// Similar to `composeMap` except that no Values need be associated with the -// constraint system nor are they looked at -- the dimensions and symbols of -// `other` are expected to correspond 1:1 to `this` system. -LogicalResult FlatAffineValueConstraints::composeMatchingMap(AffineMap other) { - assert(other.getNumDims() == getNumDimVars() && "dim mismatch"); - assert(other.getNumSymbols() == getNumSymbolVars() && "symbol mismatch"); - - std::vector> flatExprs; - if (failed(flattenAlignedMapAndMergeLocals(other, &flatExprs))) - return failure(); - assert(flatExprs.size() == other.getNumResults()); - - // Add dimensions corresponding to the map's results. - insertDimVar(/*pos=*/0, /*num=*/other.getNumResults()); - - // We add one equality for each result connecting the result dim of the map to - // the other variables. - // E.g.: if the expression is 16*i0 + i1, and this is the r^th - // iteration/result of the value map, we are adding the equality: - // d_r - 16*i0 - i1 = 0. Similarly, when flattening (i0 + 1, i0 + 8*i2), we - // add two equalities: d_0 - i0 - 1 == 0, d1 - i0 - 8*i2 == 0. - for (unsigned r = 0, e = flatExprs.size(); r < e; r++) { - const auto &flatExpr = flatExprs[r]; - assert(flatExpr.size() >= other.getNumInputs() + 1); - - SmallVector eqToAdd(getNumCols(), 0); - // Set the coefficient for this result to one. - eqToAdd[r] = 1; - - // Dims and symbols. - for (unsigned i = 0, f = other.getNumInputs(); i < f; i++) { - // Negate `eq[r]` since the newly added dimension will be set to this one. - eqToAdd[e + i] = -flatExpr[i]; - } - // Local columns of `eq` are at the beginning. - unsigned j = getNumDimVars() + getNumSymbolVars(); - unsigned end = flatExpr.size() - 1; - for (unsigned i = other.getNumInputs(); i < end; i++, j++) { - eqToAdd[j] = -flatExpr[i]; - } - - // Constant term. - eqToAdd[getNumCols() - 1] = -flatExpr[flatExpr.size() - 1]; - - // Add the equality connecting the result of the map to this constraint set. - addEquality(eqToAdd); - } - - return success(); -} - -// Turn a symbol into a dimension. -static void turnSymbolIntoDim(FlatAffineValueConstraints *cst, Value value) { - unsigned pos; - if (cst->findVar(value, &pos) && pos >= cst->getNumDimVars() && - pos < cst->getNumDimAndSymbolVars()) { - cst->swapVar(pos, cst->getNumDimVars()); - cst->setDimSymbolSeparation(cst->getNumSymbolVars() - 1); - } -} - -/// Merge and align symbols of `this` and `other` such that both get union of -/// of symbols that are unique. Symbols in `this` and `other` should be -/// unique. Symbols with Value as `None` are considered to be inequal to all -/// other symbols. -void FlatAffineValueConstraints::mergeSymbolVars( - FlatAffineValueConstraints &other) { - - assert(areVarsUnique(*this, VarKind::Symbol) && "Symbol vars are not unique"); - assert(areVarsUnique(other, VarKind::Symbol) && "Symbol vars are not unique"); - - SmallVector aSymValues; - getValues(getNumDimVars(), getNumDimAndSymbolVars(), &aSymValues); - - // Merge symbols: merge symbols into `other` first from `this`. - unsigned s = other.getNumDimVars(); - for (Value aSymValue : aSymValues) { - unsigned loc; - // If the var is a symbol in `other`, then align it, otherwise assume that - // it is a new symbol - if (other.findVar(aSymValue, &loc) && loc >= other.getNumDimVars() && - loc < other.getNumDimAndSymbolVars()) - other.swapVar(s, loc); - else - other.insertSymbolVar(s - other.getNumDimVars(), aSymValue); - s++; - } - - // Symbols that are in other, but not in this, are added at the end. - for (unsigned t = other.getNumDimVars() + getNumSymbolVars(), - e = other.getNumDimAndSymbolVars(); - t < e; t++) - insertSymbolVar(getNumSymbolVars(), other.getValue(t)); - - assert(getNumSymbolVars() == other.getNumSymbolVars() && - "expected same number of symbols"); - assert(areVarsUnique(*this, VarKind::Symbol) && "Symbol vars are not unique"); - assert(areVarsUnique(other, VarKind::Symbol) && "Symbol vars are not unique"); -} - -// Changes all symbol variables which are loop IVs to dim variables. -void FlatAffineValueConstraints::convertLoopIVSymbolsToDims() { - // Gather all symbols which are loop IVs. - SmallVector loopIVs; - for (unsigned i = getNumDimVars(), e = getNumDimAndSymbolVars(); i < e; i++) { - if (hasValue(i) && getForInductionVarOwner(getValue(i))) - loopIVs.push_back(getValue(i)); - } - // Turn each symbol in 'loopIVs' into a dim variable. - for (auto iv : loopIVs) { - turnSymbolIntoDim(this, iv); - } -} void FlatAffineValueConstraints::addInductionVarOrTerminalSymbol(Value val) { if (containsVar(val)) @@ -709,559 +211,6 @@ void FlatAffineValueConstraints::addAffineIfOpDomain(AffineIfOp ifOp) { append(cst); } -bool FlatAffineValueConstraints::hasConsistentState() const { - return IntegerPolyhedron::hasConsistentState() && - values.size() == getNumDimAndSymbolVars(); -} - -void FlatAffineValueConstraints::removeVarRange(VarKind kind, unsigned varStart, - unsigned varLimit) { - IntegerPolyhedron::removeVarRange(kind, varStart, varLimit); - unsigned offset = getVarKindOffset(kind); - - if (kind != VarKind::Local) { - values.erase(values.begin() + varStart + offset, - values.begin() + varLimit + offset); - } -} - -// Determine whether the variable at 'pos' (say var_r) can be expressed as -// modulo of another known variable (say var_n) w.r.t a constant. For example, -// if the following constraints hold true: -// ``` -// 0 <= var_r <= divisor - 1 -// var_n - (divisor * q_expr) = var_r -// ``` -// where `var_n` is a known variable (called dividend), and `q_expr` is an -// `AffineExpr` (called the quotient expression), `var_r` can be written as: -// -// `var_r = var_n mod divisor`. -// -// Additionally, in a special case of the above constaints where `q_expr` is an -// variable itself that is not yet known (say `var_q`), it can be written as a -// floordiv in the following way: -// -// `var_q = var_n floordiv divisor`. -// -// Returns true if the above mod or floordiv are detected, updating 'memo' with -// these new expressions. Returns false otherwise. -static bool detectAsMod(const FlatAffineValueConstraints &cst, unsigned pos, - int64_t lbConst, int64_t ubConst, - SmallVectorImpl &memo, - MLIRContext *context) { - assert(pos < cst.getNumVars() && "invalid position"); - - // Check if a divisor satisfying the condition `0 <= var_r <= divisor - 1` can - // be determined. - if (lbConst != 0 || ubConst < 1) - return false; - int64_t divisor = ubConst + 1; - - // Check for the aforementioned conditions in each equality. - for (unsigned curEquality = 0, numEqualities = cst.getNumEqualities(); - curEquality < numEqualities; curEquality++) { - int64_t coefficientAtPos = cst.atEq64(curEquality, pos); - // If current equality does not involve `var_r`, continue to the next - // equality. - if (coefficientAtPos == 0) - continue; - - // Constant term should be 0 in this equality. - if (cst.atEq64(curEquality, cst.getNumCols() - 1) != 0) - continue; - - // Traverse through the equality and construct the dividend expression - // `dividendExpr`, to contain all the variables which are known and are - // not divisible by `(coefficientAtPos * divisor)`. Hope here is that the - // `dividendExpr` gets simplified into a single variable `var_n` discussed - // above. - auto dividendExpr = getAffineConstantExpr(0, context); - - // Track the terms that go into quotient expression, later used to detect - // additional floordiv. - unsigned quotientCount = 0; - int quotientPosition = -1; - int quotientSign = 1; - - // Consider each term in the current equality. - unsigned curVar, e; - for (curVar = 0, e = cst.getNumDimAndSymbolVars(); curVar < e; ++curVar) { - // Ignore var_r. - if (curVar == pos) - continue; - int64_t coefficientOfCurVar = cst.atEq64(curEquality, curVar); - // Ignore vars that do not contribute to the current equality. - if (coefficientOfCurVar == 0) - continue; - // Check if the current var goes into the quotient expression. - if (coefficientOfCurVar % (divisor * coefficientAtPos) == 0) { - quotientCount++; - quotientPosition = curVar; - quotientSign = (coefficientOfCurVar * coefficientAtPos) > 0 ? 1 : -1; - continue; - } - // Variables that are part of dividendExpr should be known. - if (!memo[curVar]) - break; - // Append the current variable to the dividend expression. - dividendExpr = dividendExpr + memo[curVar] * coefficientOfCurVar; - } - - // Can't construct expression as it depends on a yet uncomputed var. - if (curVar < e) - continue; - - // Express `var_r` in terms of the other vars collected so far. - if (coefficientAtPos > 0) - dividendExpr = (-dividendExpr).floorDiv(coefficientAtPos); - else - dividendExpr = dividendExpr.floorDiv(-coefficientAtPos); - - // Simplify the expression. - dividendExpr = simplifyAffineExpr(dividendExpr, cst.getNumDimVars(), - cst.getNumSymbolVars()); - // Only if the final dividend expression is just a single var (which we call - // `var_n`), we can proceed. - // TODO: Handle AffineSymbolExpr as well. There is no reason to restrict it - // to dims themselves. - auto dimExpr = dividendExpr.dyn_cast(); - if (!dimExpr) - continue; - - // Express `var_r` as `var_n % divisor` and store the expression in `memo`. - if (quotientCount >= 1) { - auto ub = cst.getConstantBound64( - FlatAffineValueConstraints::BoundType::UB, dimExpr.getPosition()); - // If `var_n` has an upperbound that is less than the divisor, mod can be - // eliminated altogether. - if (ub && *ub < divisor) - memo[pos] = dimExpr; - else - memo[pos] = dimExpr % divisor; - // If a unique quotient `var_q` was seen, it can be expressed as - // `var_n floordiv divisor`. - if (quotientCount == 1 && !memo[quotientPosition]) - memo[quotientPosition] = dimExpr.floorDiv(divisor) * quotientSign; - - return true; - } - } - return false; -} - -/// Check if the pos^th variable can be expressed as a floordiv of an affine -/// function of other variables (where the divisor is a positive constant) -/// given the initial set of expressions in `exprs`. If it can be, the -/// corresponding position in `exprs` is set as the detected affine expr. For -/// eg: 4q <= i + j <= 4q + 3 <=> q = (i + j) floordiv 4. An equality can -/// also yield a floordiv: eg. 4q = i + j <=> q = (i + j) floordiv 4. 32q + 28 -/// <= i <= 32q + 31 => q = i floordiv 32. -static bool detectAsFloorDiv(const FlatAffineValueConstraints &cst, - unsigned pos, MLIRContext *context, - SmallVectorImpl &exprs) { - assert(pos < cst.getNumVars() && "invalid position"); - - // Get upper-lower bound pair for this variable. - SmallVector foundRepr(cst.getNumVars(), false); - for (unsigned i = 0, e = cst.getNumVars(); i < e; ++i) - if (exprs[i]) - foundRepr[i] = true; - - SmallVector dividend(cst.getNumCols()); - unsigned divisor; - auto ulPair = computeSingleVarRepr(cst, foundRepr, pos, dividend, divisor); - - // No upper-lower bound pair found for this var. - if (ulPair.kind == ReprKind::None || ulPair.kind == ReprKind::Equality) - return false; - - // Construct the dividend expression. - auto dividendExpr = getAffineConstantExpr(dividend.back(), context); - for (unsigned c = 0, f = cst.getNumVars(); c < f; c++) - if (dividend[c] != 0) - dividendExpr = dividendExpr + dividend[c] * exprs[c]; - - // Successfully detected the floordiv. - exprs[pos] = dividendExpr.floorDiv(divisor); - return true; -} - -std::pair -FlatAffineValueConstraints::getLowerAndUpperBound( - unsigned pos, unsigned offset, unsigned num, unsigned symStartPos, - ArrayRef localExprs, MLIRContext *context, - bool closedUB) const { - assert(pos + offset < getNumDimVars() && "invalid dim start pos"); - assert(symStartPos >= (pos + offset) && "invalid sym start pos"); - assert(getNumLocalVars() == localExprs.size() && - "incorrect local exprs count"); - - SmallVector lbIndices, ubIndices, eqIndices; - getLowerAndUpperBoundIndices(pos + offset, &lbIndices, &ubIndices, &eqIndices, - offset, num); - - /// Add to 'b' from 'a' in set [0, offset) U [offset + num, symbStartPos). - auto addCoeffs = [&](ArrayRef a, SmallVectorImpl &b) { - b.clear(); - for (unsigned i = 0, e = a.size(); i < e; ++i) { - if (i < offset || i >= offset + num) - b.push_back(a[i]); - } - }; - - SmallVector lb, ub; - SmallVector lbExprs; - unsigned dimCount = symStartPos - num; - unsigned symCount = getNumDimAndSymbolVars() - symStartPos; - lbExprs.reserve(lbIndices.size() + eqIndices.size()); - // Lower bound expressions. - for (auto idx : lbIndices) { - auto ineq = getInequality64(idx); - // Extract the lower bound (in terms of other coeff's + const), i.e., if - // i - j + 1 >= 0 is the constraint, 'pos' is for i the lower bound is j - // - 1. - addCoeffs(ineq, lb); - std::transform(lb.begin(), lb.end(), lb.begin(), std::negate()); - auto expr = - getAffineExprFromFlatForm(lb, dimCount, symCount, localExprs, context); - // expr ceildiv divisor is (expr + divisor - 1) floordiv divisor - int64_t divisor = std::abs(ineq[pos + offset]); - expr = (expr + divisor - 1).floorDiv(divisor); - lbExprs.push_back(expr); - } - - SmallVector ubExprs; - ubExprs.reserve(ubIndices.size() + eqIndices.size()); - // Upper bound expressions. - for (auto idx : ubIndices) { - auto ineq = getInequality64(idx); - // Extract the upper bound (in terms of other coeff's + const). - addCoeffs(ineq, ub); - auto expr = - getAffineExprFromFlatForm(ub, dimCount, symCount, localExprs, context); - expr = expr.floorDiv(std::abs(ineq[pos + offset])); - int64_t ubAdjustment = closedUB ? 0 : 1; - ubExprs.push_back(expr + ubAdjustment); - } - - // Equalities. It's both a lower and a upper bound. - SmallVector b; - for (auto idx : eqIndices) { - auto eq = getEquality64(idx); - addCoeffs(eq, b); - if (eq[pos + offset] > 0) - std::transform(b.begin(), b.end(), b.begin(), std::negate()); - - // Extract the upper bound (in terms of other coeff's + const). - auto expr = - getAffineExprFromFlatForm(b, dimCount, symCount, localExprs, context); - expr = expr.floorDiv(std::abs(eq[pos + offset])); - // Upper bound is exclusive. - ubExprs.push_back(expr + 1); - // Lower bound. - expr = - getAffineExprFromFlatForm(b, dimCount, symCount, localExprs, context); - expr = expr.ceilDiv(std::abs(eq[pos + offset])); - lbExprs.push_back(expr); - } - - auto lbMap = AffineMap::get(dimCount, symCount, lbExprs, context); - auto ubMap = AffineMap::get(dimCount, symCount, ubExprs, context); - - return {lbMap, ubMap}; -} - -/// Computes the lower and upper bounds of the first 'num' dimensional -/// variables (starting at 'offset') as affine maps of the remaining -/// variables (dimensional and symbolic variables). Local variables are -/// themselves explicitly computed as affine functions of other variables in -/// this process if needed. -void FlatAffineValueConstraints::getSliceBounds( - unsigned offset, unsigned num, MLIRContext *context, - SmallVectorImpl *lbMaps, SmallVectorImpl *ubMaps, - bool closedUB) { - assert(num < getNumDimVars() && "invalid range"); - - // Basic simplification. - normalizeConstraintsByGCD(); - - LLVM_DEBUG(llvm::dbgs() << "getSliceBounds for first " << num - << " variables\n"); - LLVM_DEBUG(dump()); - - // Record computed/detected variables. - SmallVector memo(getNumVars()); - // Initialize dimensional and symbolic variables. - for (unsigned i = 0, e = getNumDimVars(); i < e; i++) { - if (i < offset) - memo[i] = getAffineDimExpr(i, context); - else if (i >= offset + num) - memo[i] = getAffineDimExpr(i - num, context); - } - for (unsigned i = getNumDimVars(), e = getNumDimAndSymbolVars(); i < e; i++) - memo[i] = getAffineSymbolExpr(i - getNumDimVars(), context); - - bool changed; - do { - changed = false; - // Identify yet unknown variables as constants or mod's / floordiv's of - // other variables if possible. - for (unsigned pos = 0; pos < getNumVars(); pos++) { - if (memo[pos]) - continue; - - auto lbConst = getConstantBound64(BoundType::LB, pos); - auto ubConst = getConstantBound64(BoundType::UB, pos); - if (lbConst.has_value() && ubConst.has_value()) { - // Detect equality to a constant. - if (*lbConst == *ubConst) { - memo[pos] = getAffineConstantExpr(*lbConst, context); - changed = true; - continue; - } - - // Detect an variable as modulo of another variable w.r.t a - // constant. - if (detectAsMod(*this, pos, *lbConst, *ubConst, memo, context)) { - changed = true; - continue; - } - } - - // Detect an variable as a floordiv of an affine function of other - // variables (divisor is a positive constant). - if (detectAsFloorDiv(*this, pos, context, memo)) { - changed = true; - continue; - } - - // Detect an variable as an expression of other variables. - unsigned idx; - if (!findConstraintWithNonZeroAt(pos, /*isEq=*/true, &idx)) { - continue; - } - - // Build AffineExpr solving for variable 'pos' in terms of all others. - auto expr = getAffineConstantExpr(0, context); - unsigned j, e; - for (j = 0, e = getNumVars(); j < e; ++j) { - if (j == pos) - continue; - int64_t c = atEq64(idx, j); - if (c == 0) - continue; - // If any of the involved IDs hasn't been found yet, we can't proceed. - if (!memo[j]) - break; - expr = expr + memo[j] * c; - } - if (j < e) - // Can't construct expression as it depends on a yet uncomputed - // variable. - continue; - - // Add constant term to AffineExpr. - expr = expr + atEq64(idx, getNumVars()); - int64_t vPos = atEq64(idx, pos); - assert(vPos != 0 && "expected non-zero here"); - if (vPos > 0) - expr = (-expr).floorDiv(vPos); - else - // vPos < 0. - expr = expr.floorDiv(-vPos); - // Successfully constructed expression. - memo[pos] = expr; - changed = true; - } - // This loop is guaranteed to reach a fixed point - since once an - // variable's explicit form is computed (in memo[pos]), it's not updated - // again. - } while (changed); - - int64_t ubAdjustment = closedUB ? 0 : 1; - - // Set the lower and upper bound maps for all the variables that were - // computed as affine expressions of the rest as the "detected expr" and - // "detected expr + 1" respectively; set the undetected ones to null. - std::optional tmpClone; - for (unsigned pos = 0; pos < num; pos++) { - unsigned numMapDims = getNumDimVars() - num; - unsigned numMapSymbols = getNumSymbolVars(); - AffineExpr expr = memo[pos + offset]; - if (expr) - expr = simplifyAffineExpr(expr, numMapDims, numMapSymbols); - - AffineMap &lbMap = (*lbMaps)[pos]; - AffineMap &ubMap = (*ubMaps)[pos]; - - if (expr) { - lbMap = AffineMap::get(numMapDims, numMapSymbols, expr); - ubMap = AffineMap::get(numMapDims, numMapSymbols, expr + ubAdjustment); - } else { - // TODO: Whenever there are local variables in the dependence - // constraints, we'll conservatively over-approximate, since we don't - // always explicitly compute them above (in the while loop). - if (getNumLocalVars() == 0) { - // Work on a copy so that we don't update this constraint system. - if (!tmpClone) { - tmpClone.emplace(FlatAffineValueConstraints(*this)); - // Removing redundant inequalities is necessary so that we don't get - // redundant loop bounds. - tmpClone->removeRedundantInequalities(); - } - std::tie(lbMap, ubMap) = tmpClone->getLowerAndUpperBound( - pos, offset, num, getNumDimVars(), /*localExprs=*/{}, context, - closedUB); - } - - // If the above fails, we'll just use the constant lower bound and the - // constant upper bound (if they exist) as the slice bounds. - // TODO: being conservative for the moment in cases that - // lead to multiple bounds - until getConstDifference in LoopFusion.cpp is - // fixed (b/126426796). - if (!lbMap || lbMap.getNumResults() > 1) { - LLVM_DEBUG(llvm::dbgs() - << "WARNING: Potentially over-approximating slice lb\n"); - auto lbConst = getConstantBound64(BoundType::LB, pos + offset); - if (lbConst.has_value()) { - lbMap = AffineMap::get(numMapDims, numMapSymbols, - getAffineConstantExpr(*lbConst, context)); - } - } - if (!ubMap || ubMap.getNumResults() > 1) { - LLVM_DEBUG(llvm::dbgs() - << "WARNING: Potentially over-approximating slice ub\n"); - auto ubConst = getConstantBound64(BoundType::UB, pos + offset); - if (ubConst.has_value()) { - ubMap = AffineMap::get( - numMapDims, numMapSymbols, - getAffineConstantExpr(*ubConst + ubAdjustment, context)); - } - } - } - LLVM_DEBUG(llvm::dbgs() - << "lb map for pos = " << Twine(pos + offset) << ", expr: "); - LLVM_DEBUG(lbMap.dump();); - LLVM_DEBUG(llvm::dbgs() - << "ub map for pos = " << Twine(pos + offset) << ", expr: "); - LLVM_DEBUG(ubMap.dump();); - } -} - -LogicalResult FlatAffineValueConstraints::flattenAlignedMapAndMergeLocals( - AffineMap map, std::vector> *flattenedExprs) { - FlatAffineValueConstraints localCst; - if (failed(getFlattenedAffineExprs(map, flattenedExprs, &localCst))) { - LLVM_DEBUG(llvm::dbgs() - << "composition unimplemented for semi-affine maps\n"); - return failure(); - } - - // Add localCst information. - if (localCst.getNumLocalVars() > 0) { - unsigned numLocalVars = getNumLocalVars(); - // Insert local dims of localCst at the beginning. - insertLocalVar(/*pos=*/0, /*num=*/localCst.getNumLocalVars()); - // Insert local dims of `this` at the end of localCst. - localCst.appendLocalVar(/*num=*/numLocalVars); - // Dimensions of localCst and this constraint set match. Append localCst to - // this constraint set. - append(localCst); - } - - return success(); -} - -LogicalResult FlatAffineValueConstraints::addBound(BoundType type, unsigned pos, - AffineMap boundMap, - bool isClosedBound) { - assert(boundMap.getNumDims() == getNumDimVars() && "dim mismatch"); - assert(boundMap.getNumSymbols() == getNumSymbolVars() && "symbol mismatch"); - assert(pos < getNumDimAndSymbolVars() && "invalid position"); - assert((type != BoundType::EQ || isClosedBound) && - "EQ bound must be closed."); - - // Equality follows the logic of lower bound except that we add an equality - // instead of an inequality. - assert((type != BoundType::EQ || boundMap.getNumResults() == 1) && - "single result expected"); - bool lower = type == BoundType::LB || type == BoundType::EQ; - - std::vector> flatExprs; - if (failed(flattenAlignedMapAndMergeLocals(boundMap, &flatExprs))) - return failure(); - assert(flatExprs.size() == boundMap.getNumResults()); - - // Add one (in)equality for each result. - for (const auto &flatExpr : flatExprs) { - SmallVector ineq(getNumCols(), 0); - // Dims and symbols. - for (unsigned j = 0, e = boundMap.getNumInputs(); j < e; j++) { - ineq[j] = lower ? -flatExpr[j] : flatExpr[j]; - } - // Invalid bound: pos appears in `boundMap`. - // TODO: This should be an assertion. Fix `addDomainFromSliceMaps` and/or - // its callers to prevent invalid bounds from being added. - if (ineq[pos] != 0) - continue; - ineq[pos] = lower ? 1 : -1; - // Local columns of `ineq` are at the beginning. - unsigned j = getNumDimVars() + getNumSymbolVars(); - unsigned end = flatExpr.size() - 1; - for (unsigned i = boundMap.getNumInputs(); i < end; i++, j++) { - ineq[j] = lower ? -flatExpr[i] : flatExpr[i]; - } - // Make the bound closed in if flatExpr is open. The inequality is always - // created in the upper bound form, so the adjustment is -1. - int64_t boundAdjustment = (isClosedBound || type == BoundType::EQ) ? 0 : -1; - // Constant term. - ineq[getNumCols() - 1] = (lower ? -flatExpr[flatExpr.size() - 1] - : flatExpr[flatExpr.size() - 1]) + - boundAdjustment; - type == BoundType::EQ ? addEquality(ineq) : addInequality(ineq); - } - - return success(); -} - -LogicalResult FlatAffineValueConstraints::addBound(BoundType type, unsigned pos, - AffineMap boundMap) { - return addBound(type, pos, boundMap, /*isClosedBound=*/type != BoundType::UB); -} - -AffineMap -FlatAffineValueConstraints::computeAlignedMap(AffineMap map, - ValueRange operands) const { - assert(map.getNumInputs() == operands.size() && "number of inputs mismatch"); - - SmallVector dims, syms; -#ifndef NDEBUG - SmallVector newSyms; - SmallVector *newSymsPtr = &newSyms; -#else - SmallVector *newSymsPtr = nullptr; -#endif // NDEBUG - - dims.reserve(getNumDimVars()); - syms.reserve(getNumSymbolVars()); - for (unsigned i = getVarKindOffset(VarKind::SetDim), - e = getVarKindEnd(VarKind::SetDim); - i < e; ++i) - dims.push_back(values[i] ? *values[i] : Value()); - for (unsigned i = getVarKindOffset(VarKind::Symbol), - e = getVarKindEnd(VarKind::Symbol); - i < e; ++i) - syms.push_back(values[i] ? *values[i] : Value()); - - AffineMap alignedMap = - alignAffineMapWithValues(map, operands, dims, syms, newSymsPtr); - // All symbols are already part of this FlatAffineConstraints. - assert(syms.size() == newSymsPtr->size() && "unexpected new/missing symbols"); - assert(std::equal(syms.begin(), syms.end(), newSymsPtr->begin()) && - "unexpected new/missing symbols"); - return alignedMap; -} - LogicalResult FlatAffineValueConstraints::addBound(BoundType type, unsigned pos, AffineMap boundMap, ValueRange boundOperands) { @@ -1329,149 +278,34 @@ LogicalResult FlatAffineValueConstraints::addSliceBounds( return success(); } -bool FlatAffineValueConstraints::findVar(Value val, unsigned *pos) const { - unsigned i = 0; - for (const auto &mayBeVar : values) { - if (mayBeVar && *mayBeVar == val) { - *pos = i; - return true; - } - i++; - } - return false; -} - -bool FlatAffineValueConstraints::containsVar(Value val) const { - return llvm::any_of(values, [&](const std::optional &mayBeVar) { - return mayBeVar && *mayBeVar == val; - }); -} - -void FlatAffineValueConstraints::swapVar(unsigned posA, unsigned posB) { - IntegerPolyhedron::swapVar(posA, posB); - - if (getVarKindAt(posA) == VarKind::Local && - getVarKindAt(posB) == VarKind::Local) - return; - - // Treat value of a local variable as std::nullopt. - if (getVarKindAt(posA) == VarKind::Local) - values[posB] = std::nullopt; - else if (getVarKindAt(posB) == VarKind::Local) - values[posA] = std::nullopt; - else - std::swap(values[posA], values[posB]); +LogicalResult +FlatAffineValueConstraints::composeMap(const AffineValueMap *vMap) { + return composeMatchingMap( + computeAlignedMap(vMap->getAffineMap(), vMap->getOperands())); } -void FlatAffineValueConstraints::addBound(BoundType type, Value val, - int64_t value) { +// Turn a symbol into a dimension. +static void turnSymbolIntoDim(FlatAffineValueConstraints *cst, Value value) { unsigned pos; - if (!findVar(val, &pos)) - // This is a pre-condition for this method. - assert(0 && "var not found"); - addBound(type, pos, value); -} - -void FlatAffineValueConstraints::printSpace(raw_ostream &os) const { - IntegerPolyhedron::printSpace(os); - os << "("; - for (unsigned i = 0, e = getNumDimAndSymbolVars(); i < e; i++) { - if (hasValue(i)) - os << "Value\t"; - else - os << "None\t"; + if (cst->findVar(value, &pos) && pos >= cst->getNumDimVars() && + pos < cst->getNumDimAndSymbolVars()) { + cst->swapVar(pos, cst->getNumDimVars()); + cst->setDimSymbolSeparation(cst->getNumSymbolVars() - 1); } - for (unsigned i = getVarKindOffset(VarKind::Local), - e = getVarKindEnd(VarKind::Local); - i < e; ++i) - os << "Local\t"; - os << "const)\n"; } -void FlatAffineValueConstraints::clearAndCopyFrom( - const IntegerRelation &other) { - - if (auto *otherValueSet = - dyn_cast(&other)) { - *this = *otherValueSet; - } else { - *static_cast(this) = other; - values.clear(); - values.resize(getNumDimAndSymbolVars(), std::nullopt); +// Changes all symbol variables which are loop IVs to dim variables. +void FlatAffineValueConstraints::convertLoopIVSymbolsToDims() { + // Gather all symbols which are loop IVs. + SmallVector loopIVs; + for (unsigned i = getNumDimVars(), e = getNumDimAndSymbolVars(); i < e; i++) { + if (hasValue(i) && getForInductionVarOwner(getValue(i))) + loopIVs.push_back(getValue(i)); } -} - -void FlatAffineValueConstraints::fourierMotzkinEliminate( - unsigned pos, bool darkShadow, bool *isResultIntegerExact) { - SmallVector, 8> newVals = values; - if (getVarKindAt(pos) != VarKind::Local) - newVals.erase(newVals.begin() + pos); - // Note: Base implementation discards all associated Values. - IntegerPolyhedron::fourierMotzkinEliminate(pos, darkShadow, - isResultIntegerExact); - values = newVals; - assert(values.size() == getNumDimAndSymbolVars()); -} - -void FlatAffineValueConstraints::projectOut(Value val) { - unsigned pos; - bool ret = findVar(val, &pos); - assert(ret); - (void)ret; - fourierMotzkinEliminate(pos); -} - -LogicalResult FlatAffineValueConstraints::unionBoundingBox( - const FlatAffineValueConstraints &otherCst) { - assert(otherCst.getNumDimVars() == getNumDimVars() && "dims mismatch"); - assert(otherCst.getMaybeValues() - .slice(0, getNumDimVars()) - .equals(getMaybeValues().slice(0, getNumDimVars())) && - "dim values mismatch"); - assert(otherCst.getNumLocalVars() == 0 && "local vars not supported here"); - assert(getNumLocalVars() == 0 && "local vars not supported yet here"); - - // Align `other` to this. - if (!areVarsAligned(*this, otherCst)) { - FlatAffineValueConstraints otherCopy(otherCst); - mergeAndAlignVars(/*offset=*/getNumDimVars(), this, &otherCopy); - return IntegerPolyhedron::unionBoundingBox(otherCopy); + // Turn each symbol in 'loopIVs' into a dim variable. + for (auto iv : loopIVs) { + turnSymbolIntoDim(this, iv); } - - return IntegerPolyhedron::unionBoundingBox(otherCst); -} - -/// Compute an explicit representation for local vars. For all systems coming -/// from MLIR integer sets, maps, or expressions where local vars were -/// introduced to model floordivs and mods, this always succeeds. -static LogicalResult computeLocalVars(const FlatAffineValueConstraints &cst, - SmallVectorImpl &memo, - MLIRContext *context) { - unsigned numDims = cst.getNumDimVars(); - unsigned numSyms = cst.getNumSymbolVars(); - - // Initialize dimensional and symbolic variables. - for (unsigned i = 0; i < numDims; i++) - memo[i] = getAffineDimExpr(i, context); - for (unsigned i = numDims, e = numDims + numSyms; i < e; i++) - memo[i] = getAffineSymbolExpr(i - numDims, context); - - bool changed; - do { - // Each time `changed` is true at the end of this iteration, one or more - // local vars would have been detected as floordivs and set in memo; so the - // number of null entries in memo[...] strictly reduces; so this converges. - changed = false; - for (unsigned i = 0, e = cst.getNumLocalVars(); i < e; ++i) - if (!memo[numDims + numSyms + i] && - detectAsFloorDiv(cst, /*pos=*/numDims + numSyms + i, context, memo)) - changed = true; - } while (changed); - - ArrayRef localExprs = - ArrayRef(memo).take_back(cst.getNumLocalVars()); - return success( - llvm::all_of(localExprs, [](AffineExpr expr) { return expr; })); } void FlatAffineValueConstraints::getIneqAsAffineValueMap( @@ -1485,7 +319,7 @@ void FlatAffineValueConstraints::getIneqAsAffineValueMap( // Get expressions for local vars. SmallVector memo(getNumVars(), AffineExpr()); - if (failed(computeLocalVars(*this, memo, context))) + if (failed(computeLocalVars(memo, context))) assert(false && "one or more local exprs do not have an explicit representation"); auto localExprs = ArrayRef(memo).take_back(getNumLocalVars()); @@ -1519,105 +353,6 @@ void FlatAffineValueConstraints::getIneqAsAffineValueMap( vmap.reset(AffineMap::get(numDims - 1, numSyms, boundExpr), operands); } -IntegerSet -FlatAffineValueConstraints::getAsIntegerSet(MLIRContext *context) const { - if (getNumConstraints() == 0) - // Return universal set (always true): 0 == 0. - return IntegerSet::get(getNumDimVars(), getNumSymbolVars(), - getAffineConstantExpr(/*constant=*/0, context), - /*eqFlags=*/true); - - // Construct local references. - SmallVector memo(getNumVars(), AffineExpr()); - - if (failed(computeLocalVars(*this, memo, context))) { - // Check if the local variables without an explicit representation have - // zero coefficients everywhere. - SmallVector noLocalRepVars; - unsigned numDimsSymbols = getNumDimAndSymbolVars(); - for (unsigned i = numDimsSymbols, e = getNumVars(); i < e; ++i) { - if (!memo[i] && !isColZero(/*pos=*/i)) - noLocalRepVars.push_back(i - numDimsSymbols); - } - if (!noLocalRepVars.empty()) { - LLVM_DEBUG({ - llvm::dbgs() << "local variables at position(s) "; - llvm::interleaveComma(noLocalRepVars, llvm::dbgs()); - llvm::dbgs() << " do not have an explicit representation in:\n"; - this->dump(); - }); - return IntegerSet(); - } - } - - ArrayRef localExprs = - ArrayRef(memo).take_back(getNumLocalVars()); - - // Construct the IntegerSet from the equalities/inequalities. - unsigned numDims = getNumDimVars(); - unsigned numSyms = getNumSymbolVars(); - - SmallVector eqFlags(getNumConstraints()); - std::fill(eqFlags.begin(), eqFlags.begin() + getNumEqualities(), true); - std::fill(eqFlags.begin() + getNumEqualities(), eqFlags.end(), false); - - SmallVector exprs; - exprs.reserve(getNumConstraints()); - - for (unsigned i = 0, e = getNumEqualities(); i < e; ++i) - exprs.push_back(getAffineExprFromFlatForm(getEquality64(i), numDims, - numSyms, localExprs, context)); - for (unsigned i = 0, e = getNumInequalities(); i < e; ++i) - exprs.push_back(getAffineExprFromFlatForm(getInequality64(i), numDims, - numSyms, localExprs, context)); - return IntegerSet::get(numDims, numSyms, exprs, eqFlags); -} - -AffineMap mlir::alignAffineMapWithValues(AffineMap map, ValueRange operands, - ValueRange dims, ValueRange syms, - SmallVector *newSyms) { - assert(operands.size() == map.getNumInputs() && - "expected same number of operands and map inputs"); - MLIRContext *ctx = map.getContext(); - Builder builder(ctx); - SmallVector dimReplacements(map.getNumDims(), {}); - unsigned numSymbols = syms.size(); - SmallVector symReplacements(map.getNumSymbols(), {}); - if (newSyms) { - newSyms->clear(); - newSyms->append(syms.begin(), syms.end()); - } - - for (const auto &operand : llvm::enumerate(operands)) { - // Compute replacement dim/sym of operand. - AffineExpr replacement; - auto dimIt = std::find(dims.begin(), dims.end(), operand.value()); - auto symIt = std::find(syms.begin(), syms.end(), operand.value()); - if (dimIt != dims.end()) { - replacement = - builder.getAffineDimExpr(std::distance(dims.begin(), dimIt)); - } else if (symIt != syms.end()) { - replacement = - builder.getAffineSymbolExpr(std::distance(syms.begin(), symIt)); - } else { - // This operand is neither a dimension nor a symbol. Add it as a new - // symbol. - replacement = builder.getAffineSymbolExpr(numSymbols++); - if (newSyms) - newSyms->push_back(operand.value()); - } - // Add to corresponding replacements vector. - if (operand.index() < map.getNumDims()) { - dimReplacements[operand.index()] = replacement; - } else { - symReplacements[operand.index() - map.getNumDims()] = replacement; - } - } - - return map.replaceDimsAndSymbols(dimReplacements, symReplacements, - dims.size(), numSymbols); -} - FlatAffineValueConstraints FlatAffineRelation::getDomainSet() const { FlatAffineValueConstraints domain = *this; // Convert all range variables to local variables. @@ -1806,31 +541,3 @@ LogicalResult mlir::getRelationFromMap(const AffineValueMap &map, return success(); } - -LogicalResult -mlir::getMultiAffineFunctionFromMap(AffineMap map, - MultiAffineFunction &multiAff) { - FlatAffineValueConstraints cst; - std::vector> flattenedExprs; - LogicalResult result = getFlattenedAffineExprs(map, &flattenedExprs, &cst); - - if (result.failed()) - return failure(); - - DivisionRepr divs = cst.getLocalReprs(); - assert(divs.hasAllReprs() && - "AffineMap cannot produce divs without local representation"); - - // TODO: We shouldn't have to do this conversion. - Matrix mat(map.getNumResults(), map.getNumInputs() + divs.getNumDivs() + 1); - for (unsigned i = 0, e = flattenedExprs.size(); i < e; ++i) - for (unsigned j = 0, f = flattenedExprs[i].size(); j < f; ++j) - mat(i, j) = flattenedExprs[i][j]; - - multiAff = MultiAffineFunction( - PresburgerSpace::getRelationSpace(map.getNumDims(), map.getNumResults(), - map.getNumSymbols(), divs.getNumDivs()), - mat, divs); - - return success(); -} diff --git a/mlir/lib/Dialect/Affine/Analysis/Utils.cpp b/mlir/lib/Dialect/Affine/Analysis/Utils.cpp index db4fa354d4c2d..41a739d726ed5 100644 --- a/mlir/lib/Dialect/Affine/Analysis/Utils.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/Utils.cpp @@ -98,7 +98,7 @@ ComputationSliceState::getAsConstraints(FlatAffineValueConstraints *cst) { if (isValidSymbol(value)) { // Check if the symbol is a constant. if (auto cOp = value.getDefiningOp()) - cst->addBound(FlatAffineValueConstraints::EQ, value, cOp.value()); + cst->addBound(BoundType::EQ, value, cOp.value()); } else if (auto loop = getForInductionVarOwner(value)) { if (failed(cst->addAffineForOpDomain(loop))) return failure(); @@ -357,11 +357,11 @@ std::optional MemRefRegion::getConstantBoundingSizeAndShape( // that will need non-trivials means to eliminate. FlatAffineValueConstraints cstWithShapeBounds(cst); for (unsigned r = 0; r < rank; r++) { - cstWithShapeBounds.addBound(FlatAffineValueConstraints::LB, r, 0); + cstWithShapeBounds.addBound(BoundType::LB, r, 0); int64_t dimSize = memRefType.getDimSize(r); if (ShapedType::isDynamic(dimSize)) continue; - cstWithShapeBounds.addBound(FlatAffineValueConstraints::UB, r, dimSize - 1); + cstWithShapeBounds.addBound(BoundType::UB, r, dimSize - 1); } // Find a constant upper bound on the extent of this memref region along each @@ -516,7 +516,7 @@ LogicalResult MemRefRegion::compute(Operation *op, unsigned loopDepth, // Check if the symbol is a constant. Value symbol = operand; if (auto constVal = getConstantIntValue(symbol)) - cst.addBound(FlatAffineValueConstraints::EQ, symbol, constVal.value()); + cst.addBound(BoundType::EQ, symbol, constVal.value()); } else { LLVM_DEBUG(llvm::dbgs() << "unknown affine dimensional value"); return failure(); @@ -580,11 +580,10 @@ LogicalResult MemRefRegion::compute(Operation *op, unsigned loopDepth, if (addMemRefDimBounds) { auto memRefType = memref.getType().cast(); for (unsigned r = 0; r < rank; r++) { - cst.addBound(FlatAffineValueConstraints::LB, /*pos=*/r, /*value=*/0); + cst.addBound(BoundType::LB, /*pos=*/r, /*value=*/0); if (memRefType.isDynamicDim(r)) continue; - cst.addBound(FlatAffineValueConstraints::UB, /*pos=*/r, - memRefType.getDimSize(r) - 1); + cst.addBound(BoundType::UB, /*pos=*/r, memRefType.getDimSize(r) - 1); } } cst.removeTrivialRedundancy(); @@ -695,7 +694,7 @@ LogicalResult mlir::boundCheckLoadOrStoreOp(LoadOrStoreOp loadOrStoreOp, continue; // Check for overflow: d_i >= memref dim size. - ucst.addBound(FlatAffineValueConstraints::LB, r, dimSize); + ucst.addBound(BoundType::LB, r, dimSize); outOfBounds = !ucst.isEmpty(); if (outOfBounds && emitError) { loadOrStoreOp.emitOpError() @@ -706,7 +705,7 @@ LogicalResult mlir::boundCheckLoadOrStoreOp(LoadOrStoreOp loadOrStoreOp, FlatAffineValueConstraints lcst(*region.getConstraints()); std::fill(ineq.begin(), ineq.end(), 0); // d_i <= -1; - lcst.addBound(FlatAffineValueConstraints::UB, r, -1); + lcst.addBound(BoundType::UB, r, -1); outOfBounds = !lcst.isEmpty(); if (outOfBounds && emitError) { loadOrStoreOp.emitOpError() @@ -1403,9 +1402,8 @@ static void unpackOptionalValues(ArrayRef> source, /// Note: This function adds a new symbol column to the `constraints` for each /// dimension/symbol that exists in the affine map but not in `constraints`. static LogicalResult alignAndAddBound(FlatAffineValueConstraints &constraints, - IntegerPolyhedron::BoundType type, - unsigned pos, AffineMap map, - ValueRange operands) { + BoundType type, unsigned pos, + AffineMap map, ValueRange operands) { SmallVector dims, syms, newSyms; unpackOptionalValues(constraints.getMaybeValues(VarKind::SetDim), dims); unpackOptionalValues(constraints.getMaybeValues(VarKind::Symbol), syms); @@ -1482,7 +1480,7 @@ mlir::simplifyConstrainedMinMaxOp(Operation *op, // Add an inequality for each result expr_i of map: // isMin: op <= expr_i, !isMin: op >= expr_i - auto boundType = isMin ? IntegerPolyhedron::UB : IntegerPolyhedron::LB; + auto boundType = isMin ? BoundType::UB : BoundType::LB; // Upper bounds are exclusive, so add 1. (`affine.min` ops are inclusive.) AffineMap mapLbUb = isMin ? addConstToResults(map, 1) : map; if (failed( @@ -1504,8 +1502,7 @@ mlir::simplifyConstrainedMinMaxOp(Operation *op, // Add an equality: Set dimOpBound to computed bound. // Add back dimension for op. (Was removed by `getSliceBounds`.) AffineMap alignedBoundMap = boundMap.shiftDims(/*shift=*/1, /*offset=*/dimOp); - if (failed(constraints.addBound(IntegerPolyhedron::EQ, dimOpBound, - alignedBoundMap))) + if (failed(constraints.addBound(BoundType::EQ, dimOpBound, alignedBoundMap))) return failure(); // If the constraint system is empty, there is an inconsistency. (E.g., this @@ -1530,7 +1527,7 @@ mlir::simplifyConstrainedMinMaxOp(Operation *op, // Note: These equalities could have been added earlier and used to express // minOp <= expr_i. However, then we run the risk that `getSliceBounds` // computes minOpUb in terms of r_i dims, which is not desired. - if (failed(alignAndAddBound(newConstr, IntegerPolyhedron::EQ, i, + if (failed(alignAndAddBound(newConstr, BoundType::EQ, i, map.getSubMap({i - resultDimStart}), operands))) return failure(); @@ -1557,7 +1554,7 @@ mlir::simplifyConstrainedMinMaxOp(Operation *op, // Skip unused operands and operands that are already constants. if (!newOperands[i] || getConstantIntValue(newOperands[i])) continue; - if (auto bound = constraints.getConstantBound64(IntegerPolyhedron::EQ, i)) { + if (auto bound = constraints.getConstantBound64(BoundType::EQ, i)) { AffineExpr expr = i < newMap.getNumDims() ? builder.getAffineDimExpr(i) diff --git a/mlir/lib/Dialect/Affine/TransformOps/AffineTransformOps.cpp b/mlir/lib/Dialect/Affine/TransformOps/AffineTransformOps.cpp index 99dfaa9dee1d2..999adfad2ab5b 100644 --- a/mlir/lib/Dialect/Affine/TransformOps/AffineTransformOps.cpp +++ b/mlir/lib/Dialect/Affine/TransformOps/AffineTransformOps.cpp @@ -97,11 +97,9 @@ SimplifyBoundedAffineOpsOp::apply(TransformResults &results, unsigned pos; if (!cstr.findVar(std::get<0>(it), &pos)) pos = cstr.appendSymbolVar(std::get<0>(it)); - cstr.addBound(FlatAffineValueConstraints::BoundType::LB, pos, - std::get<1>(it)); + cstr.addBound(presburger::BoundType::LB, pos, std::get<1>(it)); // Note: addBound bounds are inclusive, but specified UB is exclusive. - cstr.addBound(FlatAffineValueConstraints::BoundType::UB, pos, - std::get<2>(it) - 1); + cstr.addBound(presburger::BoundType::UB, pos, std::get<2>(it) - 1); } // Transform all targets. diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp index 38d660d4ff90b..a7f96dc0e08e2 100644 --- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp @@ -2057,6 +2057,8 @@ static LogicalResult generateCopy( OpBuilder topBuilder(f.getBody()); Value zeroIndex = topBuilder.create(f.getLoc(), 0); + *sizeInBytes = 0; + if (begin == end) return success(); @@ -2105,7 +2107,6 @@ static LogicalResult generateCopy( if (*numElements == 0) { LLVM_DEBUG(llvm::dbgs() << "Nothing to copy\n"); - *sizeInBytes = 0; return success(); } @@ -2183,8 +2184,7 @@ static LogicalResult generateCopy( // fastMemRefType is a constant shaped memref. auto maySizeInBytes = getIntOrFloatMemRefSizeInBytes(fastMemRefType); // We don't account for things of unknown size. - if (!maySizeInBytes) - maySizeInBytes = 0; + *sizeInBytes = maySizeInBytes ? *maySizeInBytes : 0; LLVM_DEBUG(emitRemarkForBlock(*block) << "Creating fast buffer of type " << fastMemRefType @@ -2193,7 +2193,6 @@ static LogicalResult generateCopy( } else { // Reuse the one already created. fastMemRef = fastBufferMap[memref]; - *sizeInBytes = 0; } auto numElementsSSA = top.create(loc, *numElements); @@ -2372,8 +2371,8 @@ static bool getFullMemRefAsRegion(Operation *op, unsigned numParamLoopIVs, for (unsigned d = 0; d < rank; d++) { auto dimSize = memRefType.getDimSize(d); assert(dimSize > 0 && "filtered dynamic shapes above"); - regionCst->addBound(IntegerPolyhedron::LB, d, 0); - regionCst->addBound(IntegerPolyhedron::UB, d, dimSize - 1); + regionCst->addBound(BoundType::LB, d, 0); + regionCst->addBound(BoundType::UB, d, dimSize - 1); } return true; } @@ -2554,13 +2553,13 @@ LogicalResult mlir::affineDataCopyGenerate(Block::iterator begin, if (llvm::DebugFlag && (forOp = dyn_cast(&*begin))) { LLVM_DEBUG(forOp.emitRemark() << llvm::divideCeil(totalCopyBuffersSizeInBytes, 1024) - << " KiB of copy buffers in fast memory space for this block\n"); + << " KiB of copy buffers in fast memory space for this block"); } if (totalCopyBuffersSizeInBytes > copyOptions.fastMemCapacityBytes) { - StringRef str = "Total size of all copy buffers' for this block " - "exceeds fast memory capacity\n"; - block->getParentOp()->emitWarning(str); + block->getParentOp()->emitWarning( + "total size of all copy buffers' for this block exceeds fast memory " + "capacity"); } return success(); diff --git a/mlir/lib/Dialect/Affine/Utils/Utils.cpp b/mlir/lib/Dialect/Affine/Utils/Utils.cpp index 50405953e05bd..d96b688d29ed5 100644 --- a/mlir/lib/Dialect/Affine/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/Utils.cpp @@ -1800,8 +1800,8 @@ MemRefType mlir::normalizeMemRefType(MemRefType memrefType, for (unsigned d = 0; d < rank; ++d) { // Use constraint system only in static dimensions. if (shape[d] > 0) { - fac.addBound(IntegerPolyhedron::LB, d, 0); - fac.addBound(IntegerPolyhedron::UB, d, shape[d] - 1); + fac.addBound(BoundType::LB, d, 0); + fac.addBound(BoundType::UB, d, shape[d] - 1); } else { memrefTypeDynDims.emplace_back(d); } @@ -1824,8 +1824,7 @@ MemRefType mlir::normalizeMemRefType(MemRefType memrefType, newShape[d] = ShapedType::kDynamic; } else { // The lower bound for the shape is always zero. - std::optional ubConst = - fac.getConstantBound64(IntegerPolyhedron::UB, d); + std::optional ubConst = fac.getConstantBound64(BoundType::UB, d); // For a static memref and an affine map with no symbols, this is // always bounded. However, when we have symbols, we may not be able to // obtain a constant upper bound. Also, mapping to a negative space is diff --git a/mlir/lib/Dialect/Affine/Utils/ViewLikeInterfaceUtils.cpp b/mlir/lib/Dialect/Affine/Utils/ViewLikeInterfaceUtils.cpp index c506239744c48..f53edcefe3c79 100644 --- a/mlir/lib/Dialect/Affine/Utils/ViewLikeInterfaceUtils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/ViewLikeInterfaceUtils.cpp @@ -8,6 +8,8 @@ #include "mlir/Dialect/Affine/ViewLikeInterfaceUtils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/Utils/Utils.h" +#include "mlir/IR/PatternMatch.h" using namespace mlir; @@ -74,3 +76,33 @@ LogicalResult mlir::mergeOffsetsSizesAndStrides( droppedProducerDims, consumerOffsets, consumerSizes, consumerStrides, combinedOffsets, combinedSizes, combinedStrides); } + +void mlir::resolveSourceIndicesOffsetsAndStrides( + RewriterBase &rewriter, Location loc, ArrayRef mixedOffsets, + ArrayRef mixedStrides, + const llvm::SmallBitVector &rankReducedDims, ValueRange indicesVals, + SmallVectorImpl &sourceIndices) { + OpFoldResult zero = rewriter.getIndexAttr(0); + + // For each dimension that is rank-reduced, add a zero to the indices. + int64_t indicesDim = 0; + SmallVector indices; + for (auto dim : llvm::seq(0, mixedOffsets.size())) { + OpFoldResult ofr = + (rankReducedDims.test(dim)) ? zero : indicesVals[indicesDim++]; + indices.push_back(ofr); + } + + sourceIndices.resize(indices.size()); + sourceIndices.clear(); + for (auto [offset, index, stride] : + llvm::zip_equal(mixedOffsets, indices, mixedStrides)) { + AffineExpr off, idx, str; + bindSymbols(rewriter.getContext(), off, idx, str); + OpFoldResult ofr = makeComposedFoldedAffineApply( + rewriter, loc, AffineMap::get(0, 3, off + idx * str), + {offset, index, stride}); + sourceIndices.push_back( + getValueOrCreateConstantIndexOp(rewriter, loc, ofr)); + } +} diff --git a/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp b/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp index db3ddab483b5a..96a58459a37b9 100644 --- a/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp +++ b/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp @@ -13,8 +13,10 @@ #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/Func/Transforms/FuncConversions.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/IR/TypeUtilities.h" #include "mlir/Support/LogicalResult.h" #include "mlir/Transforms/DialectConversion.h" +#include "llvm/ADT/APInt.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MathExtras.h" #include @@ -41,7 +43,7 @@ static std::pair getHalves(const APInt &value, return {std::move(low), std::move(high)}; } -/// Returns the type with the last (innermost) dimention reduced to x1. +/// Returns the type with the last (innermost) dimension reduced to x1. /// Scalarizes 1D vector inputs to match how we extract/insert vector values, /// e.g.: /// - vector<3x2xi16> --> vector<3x1xi16> @@ -126,7 +128,7 @@ static Value dropTrailingX1Dim(ConversionPatternRewriter &rewriter, if (!vecTy) return input; - // Shape cast to drop the last x1 dimention. + // Shape cast to drop the last x1 dimension. ArrayRef shape = vecTy.getShape(); assert(shape.size() >= 2 && "Expected vector with at list two dims"); assert(shape.back() == 1 && "Expected the last vector dim to be x1"); @@ -175,13 +177,13 @@ static Value insertLastDimSlice(ConversionPatternRewriter &rewriter, /// dimension. /// When all `resultComponents` are scalars, the result type is `vector`; /// when `resultComponents` are `vector<...x1xT>`s, the result type is -/// `vector<...xNxT>`, where `N` is the number of `resultComponenets`. +/// `vector<...xNxT>`, where `N` is the number of `resultComponents`. static Value constructResultVector(ConversionPatternRewriter &rewriter, Location loc, VectorType resultType, ValueRange resultComponents) { llvm::ArrayRef resultShape = resultType.getShape(); (void)resultShape; - assert(!resultShape.empty() && "Result expected to have dimentions"); + assert(!resultShape.empty() && "Result expected to have dimensions"); assert(resultShape.back() == static_cast(resultComponents.size()) && "Wrong number of result components"); @@ -906,6 +908,116 @@ struct ConvertShRSI final : OpConversionPattern { } }; +//===----------------------------------------------------------------------===// +// ConvertSIToFP +//===----------------------------------------------------------------------===// + +struct ConvertSIToFP final : OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(arith::SIToFPOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + Location loc = op.getLoc(); + + Value in = op.getIn(); + Type oldTy = in.getType(); + auto newTy = + dyn_cast_or_null(getTypeConverter()->convertType(oldTy)); + if (!newTy) + return rewriter.notifyMatchFailure( + loc, llvm::formatv("unsupported type: {0}", oldTy)); + + unsigned oldBitWidth = getElementTypeOrSelf(oldTy).getIntOrFloatBitWidth(); + Value zeroCst = createScalarOrSplatConstant(rewriter, loc, oldTy, 0); + Value oneCst = createScalarOrSplatConstant(rewriter, loc, oldTy, 1); + Value allOnesCst = createScalarOrSplatConstant( + rewriter, loc, oldTy, APInt::getAllOnes(oldBitWidth)); + + // To avoid operating on very large unsigned numbers, perform the + // conversion on the absolute value. Then, decide whether to negate the + // result or not based on that sign bit. We assume two's complement and + // implement negation by flipping all bits and adding 1. + // Note that this relies on the the other conversion patterns to legalize + // created ops and narrow the bit widths. + Value isNeg = rewriter.create(loc, arith::CmpIPredicate::slt, + in, zeroCst); + Value bitwiseNeg = rewriter.create(loc, in, allOnesCst); + Value neg = rewriter.create(loc, bitwiseNeg, oneCst); + Value abs = rewriter.create(loc, isNeg, neg, in); + + Value absResult = rewriter.create(loc, op.getType(), abs); + Value negResult = rewriter.create(loc, absResult); + rewriter.replaceOpWithNewOp(op, isNeg, negResult, + absResult); + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// ConvertUIToFP +//===----------------------------------------------------------------------===// + +struct ConvertUIToFP final : OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(arith::UIToFPOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + Location loc = op.getLoc(); + + Type oldTy = op.getIn().getType(); + auto newTy = + dyn_cast_or_null(getTypeConverter()->convertType(oldTy)); + if (!newTy) + return rewriter.notifyMatchFailure( + loc, llvm::formatv("unsupported type: {0}", oldTy)); + unsigned newBitWidth = newTy.getElementTypeBitWidth(); + + auto [low, hi] = extractLastDimHalves(rewriter, loc, adaptor.getIn()); + Value lowInt = dropTrailingX1Dim(rewriter, loc, low); + Value hiInt = dropTrailingX1Dim(rewriter, loc, hi); + Value zeroCst = + createScalarOrSplatConstant(rewriter, loc, hiInt.getType(), 0); + + // The final result has the following form: + // if (hi == 0) return uitofp(low) + // else return uitofp(low) + uitofp(hi) * 2^BW + // + // where `BW` is the bitwidth of the narrowed integer type. We emit a + // select to make it easier to fold-away the `hi` part calculation when it + // is known to be zero. + // + // Note 1: The emulation is precise only for input values that have exact + // integer representation in the result floating point type, and may lead + // loss of precision otherwise. + // + // Note 2: We do not strictly need the `hi == 0`, case, but it makes + // constant folding easier. + Value hiEqZero = rewriter.create( + loc, arith::CmpIPredicate::eq, hiInt, zeroCst); + + Type resultTy = op.getType(); + Type resultElemTy = getElementTypeOrSelf(resultTy); + Value lowFp = rewriter.create(loc, resultTy, lowInt); + Value hiFp = rewriter.create(loc, resultTy, hiInt); + + int64_t pow2Int = int64_t(1) << newBitWidth; + Attribute pow2Attr = + rewriter.getFloatAttr(resultElemTy, static_cast(pow2Int)); + if (auto vecTy = dyn_cast(resultTy)) + pow2Attr = SplatElementsAttr::get(vecTy, pow2Attr); + + Value pow2Val = rewriter.create(loc, resultTy, pow2Attr); + + Value hiVal = rewriter.create(loc, hiFp, pow2Val); + Value result = rewriter.create(loc, lowFp, hiVal); + + rewriter.replaceOpWithNewOp(op, hiEqZero, lowFp, result); + return success(); + } +}; + //===----------------------------------------------------------------------===// // ConvertTruncI //===----------------------------------------------------------------------===// @@ -1080,6 +1192,6 @@ void arith::populateArithWideIntEmulationPatterns( ConvertIndexCastIntToIndex, ConvertIndexCastIntToIndex, ConvertIndexCastIndexToInt, - ConvertIndexCastIndexToInt>( - typeConverter, patterns.getContext()); + ConvertIndexCastIndexToInt, + ConvertSIToFP, ConvertUIToFP>(typeConverter, patterns.getContext()); } diff --git a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt index ebfe0258e0793..7e631e7ddc802 100644 --- a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt +++ b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt @@ -4,6 +4,7 @@ add_mlir_dialect_library(MLIRLLVMDialect IR/FunctionCallUtils.cpp IR/LLVMAttrs.cpp IR/LLVMDialect.cpp + IR/LLVMInlining.cpp IR/LLVMInterfaces.cpp IR/LLVMTypes.cpp IR/LLVMTypeSyntax.cpp diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index ca439ab8cc15e..428f50f674b26 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// #include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "LLVMInlining.h" #include "TypeDetail.h" #include "mlir/Dialect/LLVMIR/LLVMAttrs.h" #include "mlir/Dialect/LLVMIR/LLVMInterfaces.h" @@ -22,7 +23,6 @@ #include "mlir/IR/FunctionImplementation.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Matchers.h" -#include "mlir/Transforms/InliningUtils.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/TypeSwitch.h" @@ -2777,237 +2777,6 @@ struct LLVMOpAsmDialectInterface : public OpAsmDialectInterface { }; } // namespace -//===----------------------------------------------------------------------===// -// DialectInlinerInterface -//===----------------------------------------------------------------------===// - -/// Check whether the given alloca is an input to a lifetime intrinsic, -/// optionally passing through one or more casts on the way. This is not -/// transitive through block arguments. -static bool hasLifetimeMarkers(LLVM::AllocaOp allocaOp) { - SmallVector stack(allocaOp->getUsers().begin(), - allocaOp->getUsers().end()); - while (!stack.empty()) { - Operation *op = stack.pop_back_val(); - if (isa(op)) - return true; - if (isa(op)) - stack.append(op->getUsers().begin(), op->getUsers().end()); - } - return false; -} - -/// Move all alloca operations with a constant size in the former entry block of -/// the newly inlined callee into the entry block of the caller, and insert -/// lifetime intrinsics that limit their scope to the inlined blocks. -static void moveConstantAllocasToEntryBlock( - iterator_range inlinedBlocks) { - Block *calleeEntryBlock = &(*inlinedBlocks.begin()); - Block *callerEntryBlock = &(*calleeEntryBlock->getParent()->begin()); - if (calleeEntryBlock == callerEntryBlock) - // Nothing to do. - return; - SmallVector> allocasToMove; - bool shouldInsertLifetimes = false; - // Conservatively only move alloca operations that are part of the entry block - // and do not inspect nested regions, since they may execute conditionally or - // have other unknown semantics. - for (auto allocaOp : calleeEntryBlock->getOps()) { - IntegerAttr arraySize; - if (!matchPattern(allocaOp.getArraySize(), m_Constant(&arraySize))) - continue; - bool shouldInsertLifetime = - arraySize.getValue() != 0 && !hasLifetimeMarkers(allocaOp); - shouldInsertLifetimes |= shouldInsertLifetime; - allocasToMove.emplace_back(allocaOp, arraySize, shouldInsertLifetime); - } - if (allocasToMove.empty()) - return; - OpBuilder builder(callerEntryBlock, callerEntryBlock->begin()); - for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) { - auto newConstant = builder.create( - allocaOp->getLoc(), allocaOp.getArraySize().getType(), arraySize); - // Insert a lifetime start intrinsic where the alloca was before moving it. - if (shouldInsertLifetime) { - OpBuilder::InsertionGuard insertionGuard(builder); - builder.setInsertionPoint(allocaOp); - builder.create( - allocaOp.getLoc(), arraySize.getValue().getLimitedValue(), - allocaOp.getResult()); - } - allocaOp->moveAfter(newConstant); - allocaOp.getArraySizeMutable().assign(newConstant.getResult()); - } - if (!shouldInsertLifetimes) - return; - // Insert a lifetime end intrinsic before each return in the callee function. - for (Block &block : inlinedBlocks) { - if (!block.getTerminator()->hasTrait()) - continue; - builder.setInsertionPoint(block.getTerminator()); - for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) { - if (!shouldInsertLifetime) - continue; - builder.create( - allocaOp.getLoc(), arraySize.getValue().getLimitedValue(), - allocaOp.getResult()); - } - } -} - -static Value handleByValArgument(OpBuilder &builder, Operation *callable, - Value argument, - NamedAttribute byValAttribute) { - auto func = cast(callable); - LLVM::MemoryEffectsAttr memoryEffects = func.getMemoryAttr(); - // If there is no memory effects attribute, assume that the function is - // not read-only. - bool isReadOnly = memoryEffects && - memoryEffects.getArgMem() != ModRefInfo::ModRef && - memoryEffects.getArgMem() != ModRefInfo::Mod; - if (isReadOnly) - return argument; - // Resolve the pointee type and its size. - auto ptrType = cast(argument.getType()); - Type elementType = cast(byValAttribute.getValue()).getValue(); - unsigned int typeSize = - DataLayout(callable->getParentOfType()) - .getTypeSize(elementType); - // Allocate the new value on the stack. - Value one = builder.create( - func.getLoc(), builder.getI64Type(), builder.getI64IntegerAttr(1)); - Value allocaOp = - builder.create(func.getLoc(), ptrType, elementType, one); - // Copy the pointee to the newly allocated value. - Value copySize = builder.create( - func.getLoc(), builder.getI64Type(), builder.getI64IntegerAttr(typeSize)); - Value isVolatile = builder.create( - func.getLoc(), builder.getI1Type(), builder.getBoolAttr(false)); - builder.create(func.getLoc(), allocaOp, argument, copySize, - isVolatile); - return allocaOp; -} - -namespace { -struct LLVMInlinerInterface : public DialectInlinerInterface { - using DialectInlinerInterface::DialectInlinerInterface; - - bool isLegalToInline(Operation *call, Operation *callable, - bool wouldBeCloned) const final { - if (!wouldBeCloned) - return false; - auto callOp = dyn_cast(call); - auto funcOp = dyn_cast(callable); - if (!callOp || !funcOp) - return false; - if (auto attrs = funcOp.getArgAttrs()) { - for (Attribute attr : *attrs) { - auto attrDict = cast(attr); - for (NamedAttribute attr : attrDict) { - if (attr.getName() == LLVMDialect::getByValAttrName()) - continue; - // TODO: Handle all argument attributes; - return false; - } - } - } - // TODO: Handle result attributes; - if (funcOp.getResAttrs()) - return false; - // TODO: Handle exceptions. - if (funcOp.getPersonality()) - return false; - if (funcOp.getPassthrough()) { - // TODO: Used attributes should not be passthrough. - DenseSet disallowed( - {StringAttr::get(funcOp->getContext(), "noduplicate"), - StringAttr::get(funcOp->getContext(), "noinline"), - StringAttr::get(funcOp->getContext(), "optnone"), - StringAttr::get(funcOp->getContext(), "presplitcoroutine"), - StringAttr::get(funcOp->getContext(), "returns_twice"), - StringAttr::get(funcOp->getContext(), "strictfp")}); - if (llvm::any_of(*funcOp.getPassthrough(), [&](Attribute attr) { - auto stringAttr = dyn_cast(attr); - if (!stringAttr) - return false; - return disallowed.contains(stringAttr); - })) - return false; - } - return true; - } - - bool isLegalToInline(Region *, Region *, bool, IRMapping &) const final { - return true; - } - - /// Conservative allowlist of operations supported so far. - bool isLegalToInline(Operation *op, Region *, bool, IRMapping &) const final { - if (isPure(op)) - return true; - // Some attributes on memory operations require handling during - // inlining. Since this is not yet implemented, refuse to inline memory - // operations that have any of these attributes. - if (auto iface = dyn_cast(op)) - if (iface.getAliasScopesOrNull() || iface.getNoAliasScopesOrNull()) - return false; - if (auto iface = dyn_cast(op)) - if (iface.getAccessGroupsOrNull()) - return false; - return isa(op); - } - - /// Handle the given inlined return by replacing it with a branch. This - /// overload is called when the inlined region has more than one block. - void handleTerminator(Operation *op, Block *newDest) const final { - // Only return needs to be handled here. - auto returnOp = dyn_cast(op); - if (!returnOp) - return; - - // Replace the return with a branch to the dest. - OpBuilder builder(op); - builder.create(op->getLoc(), returnOp.getOperands(), newDest); - op->erase(); - } - - /// Handle the given inlined return by replacing the uses of the call with the - /// operands of the return. This overload is called when the inlined region - /// only contains one block. - void handleTerminator(Operation *op, - ArrayRef valuesToRepl) const final { - // Return will be the only terminator present. - auto returnOp = cast(op); - - // Replace the values directly with the return operands. - assert(returnOp.getNumOperands() == valuesToRepl.size()); - for (const auto &[dst, src] : - llvm::zip(valuesToRepl, returnOp.getOperands())) - dst.replaceAllUsesWith(src); - } - - Value handleArgument(OpBuilder &builder, Operation *call, Operation *callable, - Value argument, Type targetType, - DictionaryAttr argumentAttrs) const final { - if (auto attr = argumentAttrs.getNamed(LLVMDialect::getByValAttrName())) - return handleByValArgument(builder, callable, argument, *attr); - return argument; - } - - void processInlinedCallBlocks( - Operation *call, - iterator_range inlinedBlocks) const override { - // Alloca operations with a constant size that were in the entry block of - // the callee should be moved to the entry block of the caller, as this will - // fold into prologue/epilogue code during code generation. - // This is not implemented as a standalone pattern because we need to know - // which newly inlined block was previously the entry block of the callee. - moveConstantAllocasToEntryBlock(inlinedBlocks); - } -}; -} // end anonymous namespace - //===----------------------------------------------------------------------===// // LLVMDialect initialization, type parsing, and registration. //===----------------------------------------------------------------------===// @@ -3037,9 +2806,9 @@ void LLVMDialect::initialize() { // Support unknown operations because not all LLVM operations are registered. allowUnknownOperations(); // clang-format off - addInterfaces(); + addInterfaces(); // clang-format on + detail::addLLVMInlinerInterface(this); } #define GET_OP_CLASSES diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp new file mode 100644 index 0000000000000..23dd22b9cbd03 --- /dev/null +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp @@ -0,0 +1,293 @@ +//===- LLVMInlining.cpp - LLVM inlining interface and logic -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Logic for inlining LLVM functions and the definition of the +// LLVMInliningInterface. +// +//===----------------------------------------------------------------------===// + +#include "LLVMInlining.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/IR/Matchers.h" +#include "mlir/Transforms/InliningUtils.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "llvm-inliner" + +using namespace mlir; + +/// Check whether the given alloca is an input to a lifetime intrinsic, +/// optionally passing through one or more casts on the way. This is not +/// transitive through block arguments. +static bool hasLifetimeMarkers(LLVM::AllocaOp allocaOp) { + SmallVector stack(allocaOp->getUsers().begin(), + allocaOp->getUsers().end()); + while (!stack.empty()) { + Operation *op = stack.pop_back_val(); + if (isa(op)) + return true; + if (isa(op)) + stack.append(op->getUsers().begin(), op->getUsers().end()); + } + return false; +} + +/// Move all alloca operations with a constant size in the former entry block of +/// the newly inlined callee into the entry block of the caller, and insert +/// lifetime intrinsics that limit their scope to the inlined blocks. +static void moveConstantAllocasToEntryBlock( + iterator_range inlinedBlocks) { + Block *calleeEntryBlock = &(*inlinedBlocks.begin()); + Block *callerEntryBlock = &(*calleeEntryBlock->getParent()->begin()); + if (calleeEntryBlock == callerEntryBlock) + // Nothing to do. + return; + SmallVector> allocasToMove; + bool shouldInsertLifetimes = false; + // Conservatively only move alloca operations that are part of the entry block + // and do not inspect nested regions, since they may execute conditionally or + // have other unknown semantics. + for (auto allocaOp : calleeEntryBlock->getOps()) { + IntegerAttr arraySize; + if (!matchPattern(allocaOp.getArraySize(), m_Constant(&arraySize))) + continue; + bool shouldInsertLifetime = + arraySize.getValue() != 0 && !hasLifetimeMarkers(allocaOp); + shouldInsertLifetimes |= shouldInsertLifetime; + allocasToMove.emplace_back(allocaOp, arraySize, shouldInsertLifetime); + } + if (allocasToMove.empty()) + return; + OpBuilder builder(callerEntryBlock, callerEntryBlock->begin()); + for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) { + auto newConstant = builder.create( + allocaOp->getLoc(), allocaOp.getArraySize().getType(), arraySize); + // Insert a lifetime start intrinsic where the alloca was before moving it. + if (shouldInsertLifetime) { + OpBuilder::InsertionGuard insertionGuard(builder); + builder.setInsertionPoint(allocaOp); + builder.create( + allocaOp.getLoc(), arraySize.getValue().getLimitedValue(), + allocaOp.getResult()); + } + allocaOp->moveAfter(newConstant); + allocaOp.getArraySizeMutable().assign(newConstant.getResult()); + } + if (!shouldInsertLifetimes) + return; + // Insert a lifetime end intrinsic before each return in the callee function. + for (Block &block : inlinedBlocks) { + if (!block.getTerminator()->hasTrait()) + continue; + builder.setInsertionPoint(block.getTerminator()); + for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) { + if (!shouldInsertLifetime) + continue; + builder.create( + allocaOp.getLoc(), arraySize.getValue().getLimitedValue(), + allocaOp.getResult()); + } + } +} + +static Value handleByValArgument(OpBuilder &builder, Operation *callable, + Value argument, + NamedAttribute byValAttribute) { + auto func = cast(callable); + LLVM::MemoryEffectsAttr memoryEffects = func.getMemoryAttr(); + // If there is no memory effects attribute, assume that the function is + // not read-only. + bool isReadOnly = memoryEffects && + memoryEffects.getArgMem() != LLVM::ModRefInfo::ModRef && + memoryEffects.getArgMem() != LLVM::ModRefInfo::Mod; + if (isReadOnly) + return argument; + // Resolve the pointee type and its size. + auto ptrType = cast(argument.getType()); + Type elementType = cast(byValAttribute.getValue()).getValue(); + unsigned int typeSize = + DataLayout(callable->getParentOfType()) + .getTypeSize(elementType); + // Allocate the new value on the stack. + Value one = builder.create( + func.getLoc(), builder.getI64Type(), builder.getI64IntegerAttr(1)); + Value allocaOp = + builder.create(func.getLoc(), ptrType, elementType, one); + // Copy the pointee to the newly allocated value. + Value copySize = builder.create( + func.getLoc(), builder.getI64Type(), builder.getI64IntegerAttr(typeSize)); + Value isVolatile = builder.create( + func.getLoc(), builder.getI1Type(), builder.getBoolAttr(false)); + builder.create(func.getLoc(), allocaOp, argument, copySize, + isVolatile); + return allocaOp; +} + +namespace { +struct LLVMInlinerInterface : public DialectInlinerInterface { + using DialectInlinerInterface::DialectInlinerInterface; + + bool isLegalToInline(Operation *call, Operation *callable, + bool wouldBeCloned) const final { + if (!wouldBeCloned) + return false; + auto callOp = dyn_cast(call); + if (!callOp) { + LLVM_DEBUG(llvm::dbgs() + << "Cannot inline: call is not an LLVM::CallOp\n"); + return false; + } + auto funcOp = dyn_cast(callable); + if (!funcOp) { + LLVM_DEBUG(llvm::dbgs() + << "Cannot inline: callable is not an LLVM::LLVMFuncOp\n"); + return false; + } + if (auto attrs = funcOp.getArgAttrs()) { + for (Attribute attr : *attrs) { + auto attrDict = cast(attr); + for (NamedAttribute attr : attrDict) { + if (attr.getName() == LLVM::LLVMDialect::getByValAttrName()) + continue; + // TODO: Handle all argument attributes; + LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp.getSymName() + << ": unhandled argument attribute \"" + << attr.getName() << "\"\n"); + return false; + } + } + } + // TODO: Handle result attributes; + if (funcOp.getResAttrs()) { + LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp.getSymName() + << ": unhandled result attribute\n"); + return false; + } + // TODO: Handle exceptions. + if (funcOp.getPersonality()) { + LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp.getSymName() + << ": unhandled function personality\n"); + return false; + } + if (funcOp.getPassthrough()) { + // TODO: Used attributes should not be passthrough. + DenseSet disallowed( + {StringAttr::get(funcOp->getContext(), "noduplicate"), + StringAttr::get(funcOp->getContext(), "noinline"), + StringAttr::get(funcOp->getContext(), "optnone"), + StringAttr::get(funcOp->getContext(), "presplitcoroutine"), + StringAttr::get(funcOp->getContext(), "returns_twice"), + StringAttr::get(funcOp->getContext(), "strictfp")}); + if (llvm::any_of(*funcOp.getPassthrough(), [&](Attribute attr) { + auto stringAttr = dyn_cast(attr); + if (!stringAttr) + return false; + if (disallowed.contains(stringAttr)) { + LLVM_DEBUG(llvm::dbgs() + << "Cannot inline " << funcOp.getSymName() + << ": found disallowed function attribute " + << stringAttr << "\n"); + return true; + } + return false; + })) + return false; + } + return true; + } + + bool isLegalToInline(Region *, Region *, bool, IRMapping &) const final { + return true; + } + + /// Conservative allowlist of operations supported so far. + bool isLegalToInline(Operation *op, Region *, bool, IRMapping &) const final { + if (isPure(op)) + return true; + // Some attributes on memory operations require handling during + // inlining. Since this is not yet implemented, refuse to inline memory + // operations that have any of these attributes. + if (auto iface = dyn_cast(op)) { + if (iface.getAliasScopesOrNull() || iface.getNoAliasScopesOrNull()) { + LLVM_DEBUG(llvm::dbgs() + << "Cannot inline: unhandled alias analysis metadata\n"); + return false; + } + } + if (auto iface = dyn_cast(op)) { + if (iface.getAccessGroupsOrNull()) { + LLVM_DEBUG(llvm::dbgs() + << "Cannot inline: unhandled access group metadata\n"); + return false; + } + } + if (!isa(op)) { + LLVM_DEBUG(llvm::dbgs() + << "Cannot inline: unhandled side effecting operation \"" + << op->getName() << "\"\n"); + return false; + } + return true; + } + + /// Handle the given inlined return by replacing it with a branch. This + /// overload is called when the inlined region has more than one block. + void handleTerminator(Operation *op, Block *newDest) const final { + // Only return needs to be handled here. + auto returnOp = dyn_cast(op); + if (!returnOp) + return; + + // Replace the return with a branch to the dest. + OpBuilder builder(op); + builder.create(op->getLoc(), returnOp.getOperands(), newDest); + op->erase(); + } + + /// Handle the given inlined return by replacing the uses of the call with the + /// operands of the return. This overload is called when the inlined region + /// only contains one block. + void handleTerminator(Operation *op, + ArrayRef valuesToRepl) const final { + // Return will be the only terminator present. + auto returnOp = cast(op); + + // Replace the values directly with the return operands. + assert(returnOp.getNumOperands() == valuesToRepl.size()); + for (const auto &[dst, src] : + llvm::zip(valuesToRepl, returnOp.getOperands())) + dst.replaceAllUsesWith(src); + } + + Value handleArgument(OpBuilder &builder, Operation *call, Operation *callable, + Value argument, Type targetType, + DictionaryAttr argumentAttrs) const final { + if (auto attr = + argumentAttrs.getNamed(LLVM::LLVMDialect::getByValAttrName())) + return handleByValArgument(builder, callable, argument, *attr); + return argument; + } + + void processInlinedCallBlocks( + Operation *call, + iterator_range inlinedBlocks) const override { + // Alloca operations with a constant size that were in the entry block of + // the callee should be moved to the entry block of the caller, as this will + // fold into prologue/epilogue code during code generation. + // This is not implemented as a standalone pattern because we need to know + // which newly inlined block was previously the entry block of the callee. + moveConstantAllocasToEntryBlock(inlinedBlocks); + } +}; + +} // end anonymous namespace + +void LLVM::detail::addLLVMInlinerInterface(LLVM::LLVMDialect *dialect) { + dialect->addInterfaces(); +} diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.h b/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.h new file mode 100644 index 0000000000000..c6f75d5657c3b --- /dev/null +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.h @@ -0,0 +1,33 @@ +//===- LLVMInlining.h - Registration of LLVMInlinerInterface ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Allows registering the LLVM DialectInlinerInterface with the LLVM dialect +// during initialization. +// +//===----------------------------------------------------------------------===// + +#ifndef DIALECT_LLVMIR_IR_LLVMINLINING_H +#define DIALECT_LLVMIR_IR_LLVMINLINING_H + +namespace mlir { +namespace LLVM { + +class LLVMDialect; + +namespace detail { + +/// Register the `LLVMInlinerInterface` implementation of +/// `DialectInlinerInterface` with the LLVM dialect. +void addLLVMInlinerInterface(LLVMDialect *dialect); + +} // namespace detail + +} // namespace LLVM +} // namespace mlir + +#endif // DIALECT_LLVMIR_IR_LLVMINLINING_H diff --git a/mlir/lib/Dialect/Linalg/TransformOps/CMakeLists.txt b/mlir/lib/Dialect/Linalg/TransformOps/CMakeLists.txt index eb97c6e168e5c..b7d9812ada0b1 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/TransformOps/CMakeLists.txt @@ -20,5 +20,5 @@ add_mlir_dialect_library(MLIRLinalgTransformOps MLIRSideEffectInterfaces MLIRTransformDialect MLIRTransformDialectUtils - MLIRVectorDialect + MLIRVectorTransforms ) diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index d98eb3b781fc5..e3c1429ade54a 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -26,6 +26,7 @@ #include "mlir/Dialect/Transform/IR/TransformTypes.h" #include "mlir/Dialect/Transform/Utils/Utils.h" #include "mlir/Dialect/Utils/IndexingUtils.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" #include "mlir/IR/AffineMap.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Matchers.h" diff --git a/mlir/lib/Dialect/Linalg/Transforms/ConvertConv2DToImg2Col.cpp b/mlir/lib/Dialect/Linalg/Transforms/ConvertConv2DToImg2Col.cpp index 14bff411ef8c1..58a23e2be54d1 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ConvertConv2DToImg2Col.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ConvertConv2DToImg2Col.cpp @@ -41,6 +41,49 @@ static Value createMul(Location loc, Value x, Value y, OpBuilder &builder) { return builder.create(loc, x, y); } +// Unrolls the given composite `index` into a set of subindices with maximum +// iteration ranges specified by `factors` according to the following +// assumptions: +// 1. The iteration range for `index` is [0, f1 * f2 * ... * fn] i.e. the +// product of the given list of factors +// 2. The iterators corresponding to the entries in `factors` are ordered from +// slowest to fastest varying +// Each subindex is then computed as: +// subindex[i] = floor( (index % (fi * ... * fn)) / (fi-1 * ... * fn) ) +static SmallVector unrollIndex(OpBuilder &b, Location loc, + Value index, + ArrayRef factors) { + assert(factors.size() >= 1 && "empty factor list"); + SmallVector indices(factors.size()); + int64_t runningProd = 1; + for (int i = factors.size() - 1, end = 0; i >= end; i--) { + Value unrolledIndex = index; + if (i > 0) { + Value modBase = b.create( + loc, b.getIndexAttr(runningProd * factors[i])); + unrolledIndex = b.create(loc, unrolledIndex, modBase); + } + if (runningProd > 1) { + Value divDenom = + b.create(loc, b.getIndexAttr(runningProd)); + unrolledIndex = b.create(loc, unrolledIndex, divDenom); + } + runningProd *= factors[i]; + indices[i] = unrolledIndex; + } + return indices; +} + +// Given indices corresponding to iterators in the output (oIndex) and filter +// (fIndex) for a convolution, compute the convolved index for the +// input as `oIndex * stride + fIndex`. +static Value getConvolvedIndex(OpBuilder &b, Location loc, Value oIndex, + Value fIndex, int64_t stride) { + Value strideVal = b.create(loc, b.getIndexAttr(stride)); + Value convIndex = b.create(loc, oIndex, strideVal); + return b.create(loc, convIndex, fIndex); +} + FailureOr> rewriteInIm2Col(RewriterBase &rewriter, linalg::Conv2DNhwcHwcfOp convOp) { auto inputType = convOp.getInputs()[0].getType().cast(); @@ -68,32 +111,34 @@ rewriteInIm2Col(RewriterBase &rewriter, linalg::Conv2DNhwcHwcfOp convOp) { ArrayRef filterShape = filterType.getShape(); ArrayRef outputShape = outputType.getShape(); - int n = outputShape[0]; - int oh = outputShape[1]; - int ow = outputShape[2]; - int oc = outputShape[3]; - int fh = filterShape[0]; - int fw = filterShape[1]; - int ic = filterShape[2]; + int64_t n = outputShape[0]; + int64_t oh = outputShape[1]; + int64_t ow = outputShape[2]; + int64_t oc = outputShape[3]; + int64_t fh = filterShape[0]; + int64_t fw = filterShape[1]; + int64_t ic = filterShape[2]; Location loc = convOp.getLoc(); - SmallVector colTensorShape = {n, oh, ow, fh, fw, ic}; + // Reshape output and filter to the LHS and result of a (B)MNK matmul. + SmallVector filterReassocIndices = {{0, 1, 2}, {3}}; + auto reshapedFilterType = + RankedTensorType::get({fh * fw * ic, oc}, inputType.getElementType()); + Value reshapedFilter = rewriter.create( + loc, reshapedFilterType, filter, filterReassocIndices); + + SmallVector outputReassocIndices = {{0}, {1, 2}, {3}}; + RankedTensorType reshapedOutputType = + RankedTensorType::get({n, oh * ow, oc}, outputType.getElementType()); + Value reshapedOutput = rewriter.create( + loc, reshapedOutputType, output, outputReassocIndices); + SmallVector colTensorShape = {n, oh * ow, fh * fw * ic}; Value colTensor = rewriter.create( loc, colTensorShape, inputType.getElementType()); - AffineExpr nDim, ohDim, owDim, khDim, kwDim, icDim; - bindDims(context, nDim, ohDim, owDim, khDim, kwDim, icDim); - - AffineExpr shSym = rewriter.getAffineConstantExpr( - convOp.getStrides().getValues()[0]); - AffineExpr swSym = rewriter.getAffineConstantExpr( - convOp.getStrides().getValues()[1]); - - SmallVector inputExprs = {nDim, ohDim * shSym + khDim, - owDim * swSym + kwDim, icDim}; - + // Convert the input to a (BMK) column tensor. auto nloops = colTensorShape.size(); auto parallel = utils::IteratorType::parallel; @@ -101,85 +146,68 @@ rewriteInIm2Col(RewriterBase &rewriter, linalg::Conv2DNhwcHwcfOp convOp) { SmallVector img2colIterators(nloops, parallel); SmallVector img2colIndexingMaps = { - AffineMap::get(nloops, 0, inputExprs, context), AffineMap::getMultiDimIdentityMap(nloops, context)}; auto img2ColTensor = rewriter.create( loc, colTensor.getType(), - /*inputs=*/input, /*outputs=*/colTensor, img2colIndexingMaps, + /*inputs=*/ValueRange{}, /*outputs=*/colTensor, img2colIndexingMaps, img2colIterators, [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) { - nestedBuilder.create(nestedLoc, args[0]); + // Get the iterators named based on the matmul (batch, m, k). + Value bIndex = nestedBuilder.create(loc, 0); + Value mIndex = nestedBuilder.create(loc, 1); + Value kIndex = nestedBuilder.create(loc, 2); + + // Recover the original iteration indices from the problem/input sizes. + SmallVector mIndices = unrollIndex( + nestedBuilder, nestedLoc, mIndex, ArrayRef{oh, ow}); + auto ohIndex = mIndices[0]; + auto owIndex = mIndices[1]; + + SmallVector kIndices = unrollIndex( + nestedBuilder, nestedLoc, kIndex, ArrayRef{fh, fw, ic}); + auto fhIndex = kIndices[0]; + auto fwIndex = kIndices[1]; + auto icIndex = kIndices[2]; + + // Extract the input element corresponding to the expanded indices. + Value hIndex = + getConvolvedIndex(nestedBuilder, nestedLoc, ohIndex, fhIndex, + convOp.getStrides().getValues()[0]); + Value wIndex = + getConvolvedIndex(nestedBuilder, nestedLoc, owIndex, fwIndex, + convOp.getStrides().getValues()[1]); + + // im2col[n, oh*ow, fh*fw*ic] = input[n, sh*oh + fh, sw*ow + fw, ic] + SmallVector extractionIndices{bIndex, hIndex, wIndex, icIndex}; + Value inputVal = nestedBuilder.create( + loc, input, extractionIndices); + nestedBuilder.create(nestedLoc, inputVal); }); - SmallVector img2ColTensorReassocIndices; - SmallVector outputReassocIndices; - RankedTensorType reshapedImg2ColTensorType, reshapedOutputType; - if (n == 1) { - img2ColTensorReassocIndices = {{0, 1, 2}, {3, 4, 5}}; - outputReassocIndices = {{0, 1, 2}, {3}}; - - reshapedImg2ColTensorType = RankedTensorType::get( - {oh * ow, fh * fw * ic}, inputType.getElementType()); - reshapedOutputType = - RankedTensorType::get({oh * ow, oc}, outputType.getElementType()); - } else { - img2ColTensorReassocIndices = {{0}, {1, 2}, {3, 4, 5}}; - outputReassocIndices = {{0}, {1, 2}, {3}}; - - reshapedImg2ColTensorType = RankedTensorType::get( - {n, oh * ow, fh * fw * ic}, inputType.getElementType()); - reshapedOutputType = - RankedTensorType::get({n, oh * ow, oc}, outputType.getElementType()); - } - - SmallVector filterReassocIndices = {{0, 1, 2}, {3}}; - auto reshapedFilterType = - RankedTensorType::get({fh * fw * ic, oc}, inputType.getElementType()); - - Value reshapedImg2ColTensor = rewriter.create( - loc, reshapedImg2ColTensorType, img2ColTensor.getResult(0), - img2ColTensorReassocIndices); - - Value reshapedFilter = rewriter.create( - loc, reshapedFilterType, filter, filterReassocIndices); - - Value reshapedOutput = rewriter.create( - loc, reshapedOutputType, output, outputReassocIndices); - - Value result; - if (n == 1) { - auto matmulOp = rewriter.create( - loc, reshapedOutputType, - ArrayRef{reshapedImg2ColTensor, reshapedFilter}, - ArrayRef{reshapedOutput}); - result = matmulOp.getResults().front(); - } else { - // For cases where batch is not 1, we need to keep the batch dimension - // separate. Because the filter does not share the same batch dimension, - // the batch dimension is only used in indexing the input and output. Thus - // we cannot use existing linalg named ops like linalg.batch_matmul. - // i.e. (B x) M x K * K x N = (B x) M x N - AffineExpr bDim, mDim, nDim, kDim; - bindDims(context, bDim, mDim, nDim, kDim); - auto lhsMap = AffineMap::get(4, 0, {bDim, mDim, kDim}, context); - auto rhsMap = AffineMap::get(4, 0, {kDim, nDim}, context); - auto resultMap = AffineMap::get(4, 0, {bDim, mDim, nDim}, context); - SmallVector genericIterators = {parallel, parallel, - parallel, reduction}; - - auto genericOp = rewriter.create( - loc, reshapedOutputType, - /*inputs=*/ValueRange{reshapedImg2ColTensor, reshapedFilter}, - /*outputs=*/ValueRange{reshapedOutput}, - ArrayRef{lhsMap, rhsMap, resultMap}, genericIterators, - [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) { - Value mul = createMul(loc, args[0], args[1], nestedBuilder); - Value add = createAdd(loc, mul, args[2], nestedBuilder); - nestedBuilder.create(nestedLoc, add); - }); - result = genericOp.getResults().front(); - } + // Because the filter does not share the same batch dimension, + // the batch dimension is only used in indexing the input and output. Thus + // we cannot use existing linalg named ops like linalg.batch_matmul. + // i.e. (B x) M x K * K x N = (B x) M x N + AffineExpr bDim, mDim, nDim, kDim; + bindDims(context, bDim, mDim, nDim, kDim); + auto lhsMap = AffineMap::get(4, 0, {bDim, mDim, kDim}, context); + auto rhsMap = AffineMap::get(4, 0, {kDim, nDim}, context); + auto resultMap = AffineMap::get(4, 0, {bDim, mDim, nDim}, context); + SmallVector genericIterators = {parallel, parallel, + parallel, reduction}; + + auto genericOp = rewriter.create( + loc, reshapedOutputType, + /*inputs=*/ValueRange{img2ColTensor.getResult(0), reshapedFilter}, + /*outputs=*/ValueRange{reshapedOutput}, + ArrayRef{lhsMap, rhsMap, resultMap}, genericIterators, + [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) { + Value mul = createMul(loc, args[0], args[1], nestedBuilder); + Value add = createAdd(loc, mul, args[2], nestedBuilder); + nestedBuilder.create(nestedLoc, add); + }); + Value result = genericOp.getResults().front(); auto reshapedResult = rewriter.create( loc, outputType, result, outputReassocIndices); @@ -367,33 +395,33 @@ rewriteInIm2Col(RewriterBase &rewriter, linalg::Conv2DNchwFchwOp convOp) { auto filterShape = filterType.getShape(); auto outputShape = outputType.getShape(); - int n = outputShape[0]; - int oc = outputShape[1]; - int oh = outputShape[2]; - int ow = outputShape[3]; - int ic = filterShape[1]; - int fh = filterShape[2]; - int fw = filterShape[3]; + int64_t n = outputShape[0]; + int64_t oc = outputShape[1]; + int64_t oh = outputShape[2]; + int64_t ow = outputShape[3]; + int64_t ic = filterShape[1]; + int64_t fh = filterShape[2]; + int64_t fw = filterShape[3]; auto loc = convOp.getLoc(); - - SmallVector colTensorShape = {n, ic, fh, fw, oh, ow}; - - Value colTensor = rewriter.create( - loc, colTensorShape, inputType.getElementType()); - MLIRContext *context = rewriter.getContext(); - AffineExpr nDim, icDim, khDim, kwDim, ohDim, owDim; - bindDims(context, nDim, icDim, khDim, kwDim, ohDim, owDim); + SmallVector filterReassocIndices = {{0}, {1, 2, 3}}; + auto reshapedFilterType = + RankedTensorType::get({oc, ic * fh * fw}, inputType.getElementType()); + Value reshapedFilter = rewriter.create( + loc, reshapedFilterType, filter, filterReassocIndices); - auto shSym = rewriter.getAffineConstantExpr( - convOp.getStrides().getValues()[0]); - auto swSym = rewriter.getAffineConstantExpr( - convOp.getStrides().getValues()[1]); + SmallVector outputReassocIndices = {{0}, {1}, {2, 3}}; + auto reshapedOutputType = + RankedTensorType::get({n, oc, oh * ow}, outputType.getElementType()); + Value reshapedOutput = rewriter.create( + loc, reshapedOutputType, output, outputReassocIndices); - SmallVector inputExprs = {nDim, icDim, ohDim * shSym + khDim, - owDim * swSym + kwDim}; + // Convert the input to a (BKN) tensor. + SmallVector colTensorShape = {n, ic * fh * fw, oh * ow}; + Value colTensor = rewriter.create( + loc, colTensorShape, inputType.getElementType()); auto nloops = colTensorShape.size(); @@ -402,83 +430,67 @@ rewriteInIm2Col(RewriterBase &rewriter, linalg::Conv2DNchwFchwOp convOp) { SmallVector img2colIterators(nloops, parallel); SmallVector img2colIndexingMaps = { - AffineMap::get(nloops, 0, inputExprs, context), AffineMap::getMultiDimIdentityMap(nloops, context)}; auto img2ColTensor = rewriter.create( loc, colTensor.getType(), - /*inputs=*/input, /*outputs=*/colTensor, img2colIndexingMaps, + /*inputs=*/ValueRange{}, /*outputs=*/colTensor, img2colIndexingMaps, img2colIterators, [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) { - nestedBuilder.create(nestedLoc, args[0]); + // Get the iterators named based on the matmul (batch, m, k). + Value bIndex = nestedBuilder.create(loc, 0); + Value kIndex = nestedBuilder.create(loc, 1); + Value nIndex = nestedBuilder.create(loc, 2); + + // Recover the original iteration indices from the problem/input sizes. + SmallVector kIndices = unrollIndex( + nestedBuilder, nestedLoc, kIndex, ArrayRef{ic, fh, fw}); + auto icIndex = kIndices[0]; + auto fhIndex = kIndices[1]; + auto fwIndex = kIndices[2]; + + SmallVector nIndices = unrollIndex( + nestedBuilder, nestedLoc, nIndex, ArrayRef{oh, ow}); + auto ohIndex = nIndices[0]; + auto owIndex = nIndices[1]; + + // Extract the input element corresponding to the expanded indices. + Value hIndex = + getConvolvedIndex(nestedBuilder, nestedLoc, ohIndex, fhIndex, + convOp.getStrides().getValues()[0]); + Value wIndex = + getConvolvedIndex(nestedBuilder, nestedLoc, owIndex, fwIndex, + convOp.getStrides().getValues()[1]); + + // im2col[n, ic*fh*fw, oh*ow] = input[n, ic, sh*oh + fh, sw*ow + fw] + SmallVector extractionIndices{bIndex, icIndex, hIndex, wIndex}; + Value inputVal = nestedBuilder.create( + loc, input, extractionIndices); + nestedBuilder.create(nestedLoc, inputVal); }); - SmallVector filterReassocIndices = {{0}, {1, 2, 3}}; - auto reshapedFilterType = - RankedTensorType::get({oc, fh * fw * ic}, inputType.getElementType()); - Value reshapedFilter = rewriter.create( - loc, reshapedFilterType, filter, filterReassocIndices); - - SmallVector img2ColTensorReassocIndices; - SmallVector outputReassocIndices; - RankedTensorType reshapedImg2ColTensorType, reshapedOutputType; - if (n == 1) { - img2ColTensorReassocIndices = {{0, 1, 2, 3}, {4, 5}}; - outputReassocIndices = {{0, 1}, {2, 3}}; - - reshapedImg2ColTensorType = RankedTensorType::get( - {fh * fw * ic, oh * ow}, inputType.getElementType()); - reshapedOutputType = - RankedTensorType::get({oc, oh * ow}, outputType.getElementType()); - } else { - img2ColTensorReassocIndices = {{0}, {1, 2, 3}, {4, 5}}; - outputReassocIndices = {{0}, {1}, {2, 3}}; - - reshapedImg2ColTensorType = RankedTensorType::get( - {n, fh * fw * ic, oh * ow}, inputType.getElementType()); - reshapedOutputType = - RankedTensorType::get({n, oc, oh * ow}, outputType.getElementType()); - } - - Value reshapedImg2ColTensor = rewriter.create( - loc, reshapedImg2ColTensorType, img2ColTensor.getResult(0), - img2ColTensorReassocIndices); - - Value reshapedOutput = rewriter.create( - loc, reshapedOutputType, output, outputReassocIndices); - - Value result; - if (n == 1) { - auto matmulOp = rewriter.create( - loc, reshapedOutputType, - ArrayRef{reshapedFilter, reshapedImg2ColTensor}, - ArrayRef{reshapedOutput}); - result = matmulOp.getResults().front(); - } else { - // For cases where batch is not 1, we need to keep the batch dimension - // separate. Because the filter does not share the same batch dimension, - // the batch dimension is only used in indexing the input and output. Thus - // we cannot use existing linalg named ops like linalg.batch_matmul. - // i.e. M x K * (B x) K x N = (B x) M x N - AffineExpr bDim, mDim, nDim, kDim; - bindDims(context, bDim, mDim, nDim, kDim); - auto lhsMap = AffineMap::get(4, 0, {mDim, kDim}, context); - auto rhsMap = AffineMap::get(4, 0, {bDim, kDim, nDim}, context); - auto resultMap = AffineMap::get(4, 0, {bDim, mDim, nDim}, context); - SmallVector genericIterators = {parallel, parallel, - parallel, reduction}; - auto genericOp = rewriter.create( - loc, reshapedOutputType, - /*inputs=*/ValueRange{reshapedFilter, reshapedImg2ColTensor}, - /*outputs=*/ValueRange{reshapedOutput}, - ArrayRef{lhsMap, rhsMap, resultMap}, genericIterators, - [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) { - Value mul = createMul(loc, args[0], args[1], nestedBuilder); - Value add = createAdd(loc, mul, args[2], nestedBuilder); - nestedBuilder.create(nestedLoc, add); - }); - result = genericOp.getResults().front(); - } + // Because the filter does not share the same batch dimension, + // the batch dimension is only used in indexing the input and output. Thus + // we cannot use existing linalg named ops like linalg.batch_matmul. + // i.e. M x K * (B x) K x N = (B x) M x N + AffineExpr bDim, mDim, nDim, kDim; + bindDims(context, bDim, mDim, nDim, kDim); + auto lhsMap = AffineMap::get(4, 0, {mDim, kDim}, context); + auto rhsMap = AffineMap::get(4, 0, {bDim, kDim, nDim}, context); + auto resultMap = AffineMap::get(4, 0, {bDim, mDim, nDim}, context); + SmallVector genericIterators = {parallel, parallel, + parallel, reduction}; + auto genericOp = rewriter.create( + loc, reshapedOutputType, + /*inputs=*/ValueRange{reshapedFilter, img2ColTensor.getResult(0)}, + /*outputs=*/ValueRange{reshapedOutput}, + ArrayRef{lhsMap, rhsMap, resultMap}, genericIterators, + [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) { + Value mul = createMul(loc, args[0], args[1], nestedBuilder); + Value add = createAdd(loc, mul, args[2], nestedBuilder); + nestedBuilder.create(nestedLoc, add); + }); + Value result = genericOp.getResults().front(); auto reshapedResult = rewriter.create( loc, outputType, result, outputReassocIndices); diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index f3879f5dd9d12..75f818b1b275d 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -270,7 +270,7 @@ void getUpperBoundForIndex(Value value, AffineMap &boundMap, if (auto applyOp = dyn_cast(op)) { AffineMap map = constraints.computeAlignedMap(applyOp.getAffineMap(), applyOp.getOperands()); - if (failed(constraints.addBound(IntegerPolyhedron::EQ, + if (failed(constraints.addBound(BoundType::EQ, getPosition(applyOp.getResult()), map))) return; continue; @@ -279,7 +279,7 @@ void getUpperBoundForIndex(Value value, AffineMap &boundMap, auto minOp = cast(op); AffineMap map = constraints.computeAlignedMap(minOp.getAffineMap(), minOp.getOperands()); - if (failed(constraints.addBound(IntegerPolyhedron::UB, + if (failed(constraints.addBound(BoundType::UB, getPosition(minOp.getResult()), map, /*isClosedBound=*/true))) return; @@ -290,8 +290,7 @@ void getUpperBoundForIndex(Value value, AffineMap &boundMap, // of the terminals of the index computation. unsigned pos = getPosition(value); if (constantRequired) { - auto ubConst = constraints.getConstantBound64( - FlatAffineValueConstraints::BoundType::UB, pos); + auto ubConst = constraints.getConstantBound64(BoundType::UB, pos); if (!ubConst) return; diff --git a/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp b/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp index c1c3478b06efc..c850348c85480 100644 --- a/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp +++ b/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/ViewLikeInterfaceUtils.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Arith/Utils/Utils.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" @@ -19,7 +20,9 @@ #include "mlir/Dialect/MemRef/Transforms/Passes.h" #include "mlir/Dialect/Utils/IndexingUtils.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/IR/AffineMap.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/TypeSwitch.h" @@ -150,70 +153,6 @@ resolveSourceIndicesCollapseShape(Location loc, PatternRewriter &rewriter, return success(); } -/// Given the 'indices' of an load/store operation where the memref is a result -/// of a subview op, returns the indices w.r.t to the source memref of the -/// subview op. For example -/// -/// %0 = ... : memref<12x42xf32> -/// %1 = subview %0[%arg0, %arg1][][%stride1, %stride2] : memref<12x42xf32> to -/// memref<4x4xf32, offset=?, strides=[?, ?]> -/// %2 = load %1[%i1, %i2] : memref<4x4xf32, offset=?, strides=[?, ?]> -/// -/// could be folded into -/// -/// %2 = load %0[%arg0 + %i1 * %stride1][%arg1 + %i2 * %stride2] : -/// memref<12x42xf32> -static LogicalResult -resolveSourceIndicesSubView(Location loc, PatternRewriter &rewriter, - memref::SubViewOp subViewOp, ValueRange indices, - SmallVectorImpl &sourceIndices) { - SmallVector mixedOffsets = subViewOp.getMixedOffsets(); - SmallVector mixedSizes = subViewOp.getMixedSizes(); - SmallVector mixedStrides = subViewOp.getMixedStrides(); - - SmallVector useIndices; - // Check if this is rank-reducing case. Then for every unit-dim size add a - // zero to the indices. - int64_t resultDim = 0; - llvm::SmallBitVector unusedDims = subViewOp.getDroppedDims(); - for (auto dim : llvm::seq(0, subViewOp.getSourceType().getRank())) { - if (unusedDims.test(dim)) - useIndices.push_back(rewriter.create(loc, 0)); - else - useIndices.push_back(indices[resultDim++]); - } - if (useIndices.size() != mixedOffsets.size()) - return failure(); - sourceIndices.resize(useIndices.size()); - for (auto index : llvm::seq(0, mixedOffsets.size())) { - SmallVector dynamicOperands; - AffineExpr expr = rewriter.getAffineDimExpr(0); - int64_t numSymbols = 0; - dynamicOperands.push_back(useIndices[index]); - - // Multiply the stride; - if (auto attr = mixedStrides[index].dyn_cast()) { - expr = expr * attr.cast().getInt(); - } else { - dynamicOperands.push_back(mixedStrides[index].get()); - expr = expr * rewriter.getAffineSymbolExpr(numSymbols++); - } - - // Add the offset. - if (auto attr = mixedOffsets[index].dyn_cast()) { - expr = expr + attr.cast().getInt(); - } else { - dynamicOperands.push_back(mixedOffsets[index].get()); - expr = expr + rewriter.getAffineSymbolExpr(numSymbols++); - } - Location loc = subViewOp.getLoc(); - OpFoldResult ofr = makeComposedFoldedAffineApply( - rewriter, loc, AffineMap::get(1, numSymbols, expr), dynamicOperands); - sourceIndices[index] = getValueOrCreateConstantIndexOp(rewriter, loc, ofr); - } - return success(); -} - /// Helpers to access the memref operand for each op. template static Value getMemRefOperand(LoadOrStoreOpTy op) { @@ -236,25 +175,6 @@ static Value getMemRefOperand(gpu::SubgroupMmaStoreMatrixOp op) { return op.getDstMemref(); } -/// Given the permutation map of the original -/// `vector.transfer_read`/`vector.transfer_write` operations compute the -/// permutation map to use after the subview is folded with it. -static AffineMapAttr getPermutationMapAttr(MLIRContext *context, - memref::SubViewOp subViewOp, - AffineMap currPermutationMap) { - llvm::SmallBitVector unusedDims = subViewOp.getDroppedDims(); - SmallVector exprs; - int64_t sourceRank = subViewOp.getSourceType().getRank(); - for (auto dim : llvm::seq(0, sourceRank)) { - if (unusedDims.test(dim)) - continue; - exprs.push_back(getAffineDimExpr(dim, context)); - } - auto resultDimToSourceDimMap = AffineMap::get(sourceRank, 0, exprs, context); - return AffineMapAttr::get( - currPermutationMap.compose(resultDimToSourceDimMap)); -} - //===----------------------------------------------------------------------===// // Patterns //===----------------------------------------------------------------------===// @@ -390,6 +310,42 @@ calculateExpandedAccessIndices(AffineMap affineMap, return expandedIndices; } +template +static LogicalResult +preconditionsFoldSubViewOpImpl(RewriterBase &rewriter, XferOp xferOp, + memref::SubViewOp subviewOp) { + static_assert( + !llvm::is_one_of::value, + "must be a vector transfer op"); + if (xferOp.hasOutOfBoundsDim()) + return rewriter.notifyMatchFailure(xferOp, "out of bounds transfer dim"); + if (xferOp.getMask()) + return rewriter.notifyMatchFailure(xferOp, "masked transfer"); + if (!subviewOp.hasUnitStride()) { + return rewriter.notifyMatchFailure( + xferOp, "non-1 stride subview, need to track strides in folded memref"); + } + return success(); +} + +static LogicalResult preconditionsFoldSubViewOp(RewriterBase &rewriter, + Operation *op, + memref::SubViewOp subviewOp) { + return success(); +} + +static LogicalResult preconditionsFoldSubViewOp(RewriterBase &rewriter, + vector::TransferReadOp readOp, + memref::SubViewOp subviewOp) { + return preconditionsFoldSubViewOpImpl(rewriter, readOp, subviewOp); +} + +static LogicalResult preconditionsFoldSubViewOp(RewriterBase &rewriter, + vector::TransferWriteOp writeOp, + memref::SubViewOp subviewOp) { + return preconditionsFoldSubViewOpImpl(rewriter, writeOp, subviewOp); +} + template LogicalResult LoadOpOfSubViewOpFolder::matchAndRewrite( OpTy loadOp, PatternRewriter &rewriter) const { @@ -397,7 +353,12 @@ LogicalResult LoadOpOfSubViewOpFolder::matchAndRewrite( getMemRefOperand(loadOp).template getDefiningOp(); if (!subViewOp) - return failure(); + return rewriter.notifyMatchFailure(loadOp, "not a subview producer"); + + LogicalResult preconditionResult = + preconditionsFoldSubViewOp(rewriter, loadOp, subViewOp); + if (failed(preconditionResult)) + return preconditionResult; SmallVector indices(loadOp.getIndices().begin(), loadOp.getIndices().end()); @@ -410,9 +371,10 @@ LogicalResult LoadOpOfSubViewOpFolder::matchAndRewrite( indices.assign(expandedIndices.begin(), expandedIndices.end()); } SmallVector sourceIndices; - if (failed(resolveSourceIndicesSubView(loadOp.getLoc(), rewriter, subViewOp, - indices, sourceIndices))) - return failure(); + resolveSourceIndicesOffsetsAndStrides( + rewriter, loadOp.getLoc(), subViewOp.getMixedOffsets(), + subViewOp.getMixedStrides(), subViewOp.getDroppedDims(), indices, + sourceIndices); llvm::TypeSwitch(loadOp) .Case([&](AffineLoadOp op) { @@ -423,14 +385,13 @@ LogicalResult LoadOpOfSubViewOpFolder::matchAndRewrite( rewriter.replaceOpWithNewOp( loadOp, subViewOp.getSource(), sourceIndices, op.getNontemporal()); }) - .Case([&](vector::TransferReadOp transferReadOp) { + .Case([&](vector::TransferReadOp op) { rewriter.replaceOpWithNewOp( - transferReadOp, transferReadOp.getVectorType(), - subViewOp.getSource(), sourceIndices, - getPermutationMapAttr(rewriter.getContext(), subViewOp, - transferReadOp.getPermutationMap()), - transferReadOp.getPadding(), - /*mask=*/Value(), transferReadOp.getInBoundsAttr()); + op, op.getVectorType(), subViewOp.getSource(), sourceIndices, + AffineMapAttr::get(expandDimsToRank( + op.getPermutationMap(), subViewOp.getSourceType().getRank(), + subViewOp.getDroppedDims())), + op.getPadding(), /*mask=*/Value(), op.getInBoundsAttr()); }) .Case([&](gpu::SubgroupMmaLoadMatrixOp op) { rewriter.replaceOpWithNewOp( @@ -512,7 +473,12 @@ LogicalResult StoreOpOfSubViewOpFolder::matchAndRewrite( getMemRefOperand(storeOp).template getDefiningOp(); if (!subViewOp) - return failure(); + return rewriter.notifyMatchFailure(storeOp, "not a subview producer"); + + LogicalResult preconditionResult = + preconditionsFoldSubViewOp(rewriter, storeOp, subViewOp); + if (failed(preconditionResult)) + return preconditionResult; SmallVector indices(storeOp.getIndices().begin(), storeOp.getIndices().end()); @@ -525,9 +491,10 @@ LogicalResult StoreOpOfSubViewOpFolder::matchAndRewrite( indices.assign(expandedIndices.begin(), expandedIndices.end()); } SmallVector sourceIndices; - if (failed(resolveSourceIndicesSubView(storeOp.getLoc(), rewriter, subViewOp, - indices, sourceIndices))) - return failure(); + resolveSourceIndicesOffsetsAndStrides( + rewriter, storeOp.getLoc(), subViewOp.getMixedOffsets(), + subViewOp.getMixedStrides(), subViewOp.getDroppedDims(), indices, + sourceIndices); llvm::TypeSwitch(storeOp) .Case([&](AffineStoreOp op) { @@ -542,8 +509,9 @@ LogicalResult StoreOpOfSubViewOpFolder::matchAndRewrite( .Case([&](vector::TransferWriteOp op) { rewriter.replaceOpWithNewOp( op, op.getValue(), subViewOp.getSource(), sourceIndices, - getPermutationMapAttr(rewriter.getContext(), subViewOp, - op.getPermutationMap()), + AffineMapAttr::get(expandDimsToRank( + op.getPermutationMap(), subViewOp.getSourceType().getRank(), + subViewOp.getDroppedDims())), op.getInBoundsAttr()); }) .Case([&](gpu::SubgroupMmaStoreMatrixOp op) { diff --git a/mlir/lib/Dialect/SCF/Utils/AffineCanonicalizationUtils.cpp b/mlir/lib/Dialect/SCF/Utils/AffineCanonicalizationUtils.cpp index 6964747cdebb6..1c458eee44d1a 100644 --- a/mlir/lib/Dialect/SCF/Utils/AffineCanonicalizationUtils.cpp +++ b/mlir/lib/Dialect/SCF/Utils/AffineCanonicalizationUtils.cpp @@ -98,9 +98,9 @@ LogicalResult scf::addLoopRangeConstraints(FlatAffineValueConstraints &cstr, std::optional lbInt = getConstantIntValue(lb); std::optional ubInt = getConstantIntValue(ub); if (lbInt) - cstr.addBound(IntegerPolyhedron::EQ, symLb, *lbInt); + cstr.addBound(BoundType::EQ, symLb, *lbInt); if (ubInt) - cstr.addBound(IntegerPolyhedron::EQ, symUb, *ubInt); + cstr.addBound(BoundType::EQ, symUb, *ubInt); // Lower bound: iv >= lb (equiv.: iv - lb >= 0) SmallVector ineqLb(cstr.getNumCols(), 0); @@ -131,7 +131,7 @@ LogicalResult scf::addLoopRangeConstraints(FlatAffineValueConstraints &cstr, /*dimCount=*/cstr.getNumDimVars(), /*symbolCount=*/cstr.getNumSymbolVars(), /*result=*/ivUb); - return cstr.addBound(IntegerPolyhedron::UB, dimIv, map); + return cstr.addBound(BoundType::UB, dimIv, map); } /// Canonicalize min/max operations in the context of for loops with a known @@ -202,9 +202,9 @@ LogicalResult scf::rewritePeeledMinMaxOp(RewriterBase &rewriter, Operation *op, constraints.appendDimVar({iv}); constraints.appendSymbolVar({ub, step}); if (auto constUb = getConstantIntValue(ub)) - constraints.addBound(IntegerPolyhedron::EQ, 1, *constUb); + constraints.addBound(BoundType::EQ, 1, *constUb); if (auto constStep = getConstantIntValue(step)) - constraints.addBound(IntegerPolyhedron::EQ, 2, *constStep); + constraints.addBound(BoundType::EQ, 2, *constStep); // Add loop peeling invariant. This is the main piece of knowledge that // enables AffineMinOp simplification. diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp index 9d26e51e04fd5..93db7da27abdd 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp @@ -2396,6 +2396,26 @@ struct InsertSliceOpSourceCastInserter final }; } // namespace +llvm::SmallBitVector InsertSliceOp::getDroppedDims() { + ArrayRef resultShape = getType().getShape(); + SmallVector mixedSizes = getMixedSizes(); + llvm::SmallBitVector droppedDims(mixedSizes.size()); + unsigned shapePos = 0; + for (const auto &size : enumerate(mixedSizes)) { + std::optional sizeVal = getConstantIntValue(size.value()); + // If the size is not 1, or if the current matched dimension of the result + // is the same static shape as the size value (which is 1), then the + // dimension is preserved. + if (!sizeVal || *sizeVal != 1 || + (shapePos < resultShape.size() && resultShape[shapePos] == 1)) { + shapePos++; + continue; + } + droppedDims.set(size.index()); + } + return droppedDims; +} + void InsertSliceOp::getCanonicalizationPatterns(RewritePatternSet &results, MLIRContext *context) { results.add, diff --git a/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp index 426b1363c6a0e..d27c4576a8b7a 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp @@ -53,6 +53,6 @@ struct TensorBufferizePass }; } // namespace -std::unique_ptr mlir::createTensorBufferizePass() { +std::unique_ptr mlir::tensor::createTensorBufferizePass() { return std::make_unique(); } diff --git a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt index 5ed3d97b2719f..9f6780730dc71 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt @@ -4,6 +4,7 @@ add_mlir_dialect_library(MLIRTensorTransforms EmptyOpPatterns.cpp ExtractSliceFromReshapeUtils.cpp FoldIntoPackAndUnpackPatterns.cpp + FoldTensorSubsetOps.cpp MergeConsecutiveInsertExtractSlicePatterns.cpp ReshapePatterns.cpp SplitPaddingPatterns.cpp @@ -29,4 +30,5 @@ add_mlir_dialect_library(MLIRTensorTransforms MLIRTensorDialect MLIRTilingInterface MLIRTransforms + MLIRVectorDialect ) diff --git a/mlir/lib/Dialect/Tensor/Transforms/FoldTensorSubsetOps.cpp b/mlir/lib/Dialect/Tensor/Transforms/FoldTensorSubsetOps.cpp new file mode 100644 index 0000000000000..80ecb868dff6a --- /dev/null +++ b/mlir/lib/Dialect/Tensor/Transforms/FoldTensorSubsetOps.cpp @@ -0,0 +1,173 @@ +//===- FoldTensorSubsetOps.cpp - Fold tensor subset ops -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Fold tensor subset ops with producer / consumers. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/ViewLikeInterfaceUtils.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Tensor/Transforms/Passes.h" +#include "mlir/Dialect/Tensor/Transforms/Transforms.h" +#include "mlir/Dialect/Utils/IndexingUtils.h" +#include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/IR/AffineMap.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "llvm/ADT/TypeSwitch.h" + +namespace mlir { +namespace tensor { +#define GEN_PASS_DEF_FOLDTENSORSUBSETOPS +#include "mlir/Dialect/Tensor/Transforms/Passes.h.inc" +} // namespace tensor +} // namespace mlir + +using namespace mlir; + +static Value getTensorOperand(vector::TransferReadOp op) { + return op.getSource(); +} + +static Value getTensorOperand(tensor::InsertSliceOp op) { + return op.getSource(); +} + +//===----------------------------------------------------------------------===// +// Patterns +//===----------------------------------------------------------------------===// + +namespace { +/// Merge extract_slice operation with load/transferRead operation. +class TransferReadOfExtractSliceOpFolder final + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::TransferReadOp readOp, + PatternRewriter &rewriter) const override; +}; + +/// Merge insert_slice operation with store/transferWriteOp operation. +class InsertSliceOfTransferWriteOpFolder final + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(tensor::InsertSliceOp insertSliceOp, + PatternRewriter &rewriter) const override; +}; +} // namespace + +template +static LogicalResult preconditionsFoldExtractOrInsertWithTransferOp( + RewriterBase &rewriter, XferOp xferOp, + ExtractOrInsertOp extractOrInsertSliceOp) { + if (xferOp.hasOutOfBoundsDim()) + return rewriter.notifyMatchFailure(xferOp, "out of bounds transfer dim"); + if (xferOp.getMask()) + return rewriter.notifyMatchFailure(xferOp, "masked transfer"); + if (!extractOrInsertSliceOp.hasUnitStride()) { + return rewriter.notifyMatchFailure( + xferOp, "non-1 stride insert/extract, requires keeping track of " + "strides, this may result in needing to insert " + "vector.insert_strided_slice/extract_strided_slice ops"); + } + return success(); +} + +LogicalResult TransferReadOfExtractSliceOpFolder::matchAndRewrite( + vector::TransferReadOp readOp, PatternRewriter &rewriter) const { + auto extractSliceOp = + getTensorOperand(readOp).getDefiningOp(); + if (!extractSliceOp) + return rewriter.notifyMatchFailure(readOp, "not an extract_slice"); + + LogicalResult preconditionResult = + preconditionsFoldExtractOrInsertWithTransferOp(rewriter, readOp, + extractSliceOp); + if (failed(preconditionResult)) + return preconditionResult; + + SmallVector indices(readOp.getIndices().begin(), + readOp.getIndices().end()); + SmallVector sourceIndices; + resolveSourceIndicesOffsetsAndStrides( + rewriter, readOp.getLoc(), extractSliceOp.getMixedOffsets(), + extractSliceOp.getMixedStrides(), extractSliceOp.getDroppedDims(), + indices, sourceIndices); + + rewriter.replaceOpWithNewOp( + readOp, readOp.getVectorType(), extractSliceOp.getSource(), sourceIndices, + AffineMapAttr::get(expandDimsToRank( + readOp.getPermutationMap(), extractSliceOp.getSourceType().getRank(), + extractSliceOp.getDroppedDims())), + readOp.getPadding(), + /*mask=*/Value(), readOp.getInBoundsAttr()); + + return success(); +} + +LogicalResult InsertSliceOfTransferWriteOpFolder::matchAndRewrite( + tensor::InsertSliceOp insertSliceOp, PatternRewriter &rewriter) const { + auto writeOp = getTensorOperand(insertSliceOp) + .template getDefiningOp(); + if (!writeOp) + return rewriter.notifyMatchFailure(insertSliceOp, "not a transfer_write"); + + LogicalResult preconditionResult = + preconditionsFoldExtractOrInsertWithTransferOp(rewriter, writeOp, + insertSliceOp); + if (failed(preconditionResult)) + return preconditionResult; + + SmallVector indices(writeOp.getIndices().begin(), + writeOp.getIndices().end()); + SmallVector sourceIndices; + resolveSourceIndicesOffsetsAndStrides( + rewriter, writeOp.getLoc(), insertSliceOp.getMixedOffsets(), + insertSliceOp.getMixedStrides(), insertSliceOp.getDroppedDims(), indices, + sourceIndices); + + rewriter.replaceOpWithNewOp( + insertSliceOp, writeOp.getValue(), insertSliceOp.getDest(), sourceIndices, + AffineMapAttr::get(expandDimsToRank(writeOp.getPermutationMap(), + insertSliceOp.getDestType().getRank(), + insertSliceOp.getDroppedDims())), + writeOp.getInBoundsAttr()); + + return success(); +} + +void tensor::populateFoldTensorSubsetOpPatterns(RewritePatternSet &patterns) { + patterns.add(patterns.getContext()); +} +//===----------------------------------------------------------------------===// +// Pass registration +//===----------------------------------------------------------------------===// + +namespace { + +struct FoldTensorSubsetOpsPass final + : public tensor::impl::FoldTensorSubsetOpsBase { + void runOnOperation() override; +}; + +} // namespace + +void FoldTensorSubsetOpsPass::runOnOperation() { + RewritePatternSet patterns(&getContext()); + tensor::populateFoldTensorSubsetOpPatterns(patterns); + (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); +} + +std::unique_ptr tensor::createFoldTensorSubsetOpsPass() { + return std::make_unique(); +} diff --git a/mlir/lib/Dialect/Tensor/Transforms/MergeConsecutiveInsertExtractSlicePatterns.cpp b/mlir/lib/Dialect/Tensor/Transforms/MergeConsecutiveInsertExtractSlicePatterns.cpp index 4169882046556..895d1b1f02f07 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/MergeConsecutiveInsertExtractSlicePatterns.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/MergeConsecutiveInsertExtractSlicePatterns.cpp @@ -18,6 +18,7 @@ using namespace mlir::tensor; namespace { /// Merges consecutive tensor.extract_slice ops into one. +// TODO: move to FoldTensorSubsetOps and unify APIs with FoldMemRefAliasOps. struct MergeConsecutiveExtractSlice : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -41,6 +42,7 @@ struct MergeConsecutiveExtractSlice : public OpRewritePattern { }; /// Merges consecutive tensor.insert_slice ops into one. +// TODO: move to FoldTensorSubsetOps and unify APIs with FoldMemRefAliasOps. template struct MergeConsecutiveInsertSlice : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; diff --git a/mlir/lib/Dialect/Tensor/Utils/CMakeLists.txt b/mlir/lib/Dialect/Tensor/Utils/CMakeLists.txt index efc78420b9e17..b7848b1a44229 100644 --- a/mlir/lib/Dialect/Tensor/Utils/CMakeLists.txt +++ b/mlir/lib/Dialect/Tensor/Utils/CMakeLists.txt @@ -7,6 +7,7 @@ add_mlir_dialect_library(MLIRTensorUtils LINK_LIBS PUBLIC MLIRAffineDialect MLIRArithDialect + MLIRArithUtils MLIRIR MLIRTensorDialect ) diff --git a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp index a5847250fa915..4c09c540bde2e 100644 --- a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp @@ -14,6 +14,7 @@ #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Arith/Utils/Utils.h" #include "mlir/Dialect/Utils/IndexingUtils.h" using namespace mlir; diff --git a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp index 1a8a5782e11f6..16f23e4798c02 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp @@ -519,6 +519,65 @@ void ClampOp::getCanonicalizationPatterns(RewritePatternSet &results, results.add(context); } +struct ConcatSliceOptimization : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(tosa::SliceOp sliceOp, + PatternRewriter &rewriter) const override { + Value sliceInput = sliceOp.getInput(); + auto concatOp = sliceInput.getDefiningOp(); + if (!concatOp) + return rewriter.notifyMatchFailure( + sliceOp, "slice input must be concat operation"); + + OperandRange inputs = concatOp.getInput1(); + auto concatType = dyn_cast(concatOp.getType()); + if (!concatType || !concatType.hasStaticShape()) + return rewriter.notifyMatchFailure( + sliceOp, "slice input must be a static ranked tensor"); + int32_t axis = concatOp.getAxis(); + + llvm::SmallVector sliceStart(sliceOp.getStart()); + llvm::ArrayRef sliceSize = sliceOp.getSize(); + + // Validate slice on the concatenated axis. Slicing along this + // axis should span only one of the inputs to the concatenate + // operation. + std::optional replaceWithSlice; + for (auto input : inputs) { + auto inputType = dyn_cast(input.getType()); + if (!inputType || !inputType.hasStaticShape()) + return rewriter.notifyMatchFailure( + sliceOp, "concat input must be a static ranked tensor"); + + if (sliceStart[axis] >= 0 && + (sliceStart[axis] + sliceSize[axis]) <= inputType.getDimSize(axis)) { + replaceWithSlice = + rewriter + .create( + sliceOp.getLoc(), sliceOp.getType(), input, + rewriter.getDenseI64ArrayAttr(sliceOp.getStart()), + rewriter.getDenseI64ArrayAttr(sliceSize)) + .getResult(); + break; + } + sliceStart[axis] -= inputType.getDimSize(axis); + } + + if (!replaceWithSlice) + return rewriter.notifyMatchFailure( + sliceOp, "corresponding concat input not found for slice"); + + rewriter.replaceOp(sliceOp, replaceWithSlice.value()); + return success(); + } +}; + +void SliceOp::getCanonicalizationPatterns(RewritePatternSet &results, + MLIRContext *context) { + results.add(context); +} + //===----------------------------------------------------------------------===// // Operator Folders. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp index 9796693b4b6cd..ce7d1844ac7f1 100644 --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -640,7 +640,7 @@ ParseResult ContractionOp::parse(OpAsmParser &parser, OperationState &result) { auto loc = parser.getCurrentLocation(); DictionaryAttr dictAttr; // TODO: Unify linalg op attribute parsing. - if (parser.parseAttribute(dictAttr, "_", result.attributes) || + if (parser.parseAttribute(dictAttr) || parser.parseOperand(lhsInfo) || parser.parseComma() || parser.parseOperand(rhsInfo) || parser.parseComma() || parser.parseOperand(accInfo) || @@ -653,7 +653,7 @@ ParseResult ContractionOp::parse(OpAsmParser &parser, OperationState &result) { parser.resolveOperand(accInfo, resultType, result.operands) || parser.addTypeToList(resultType, result.types)) return failure(); - result.attributes.assign(dictAttr.getValue().begin(), + result.attributes.append(dictAttr.getValue().begin(), dictAttr.getValue().end()); // Convert array of string into an array of IteratyType enums. This is needed, @@ -3733,6 +3733,8 @@ namespace { /// %1 = vector.transfer_read %t[%p0, %p1], %cst {in_bounds = [true, true]} /// : tensor, vector<4x5xf32> /// ``` +// TODO: this is brittle and should be deprecated in favor of a more general +// pattern that applies on-demand. struct FoldExtractSliceIntoTransferRead : public OpRewritePattern { public: @@ -3883,9 +3885,13 @@ struct TransferReadAfterWriteToBroadcast void TransferReadOp::getCanonicalizationPatterns(RewritePatternSet &results, MLIRContext *context) { - results - .add( - context); + // clang-format off + results.add < + // TODO: this is brittle and should be deprecated in favor of a + // more general pattern that applies on-demand. + FoldExtractSliceIntoTransferRead, + TransferReadAfterWriteToBroadcast>(context); + // clang-format on } //===----------------------------------------------------------------------===// @@ -4235,6 +4241,8 @@ class FoldWaw final : public OpRewritePattern { /// %1 = vector.transfer_write %v, %t2[%a, %b] {in_bounds = [true, true]} /// : vector<4x5xf32>, tensor /// ``` +// TODO: this is brittle and should be deprecated in favor of a more general +// pattern that applies on-demand. struct FoldInsertSliceIntoTransferWrite : public OpRewritePattern { public: @@ -4417,8 +4425,13 @@ struct SwapExtractSliceOfTransferWrite void TransferWriteOp::getCanonicalizationPatterns(RewritePatternSet &results, MLIRContext *context) { - results.add(context); + // clang-format on } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp b/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp index 60996b9add614..136d234742b8d 100644 --- a/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp +++ b/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp @@ -7,13 +7,14 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/Vector/TransformOps/VectorTransformOps.h" - #include "mlir/Conversion/VectorToSCF/VectorToSCF.h" #include "mlir/Dialect/PDL/IR/PDL.h" #include "mlir/Dialect/PDL/IR/PDLTypes.h" #include "mlir/Dialect/Transform/IR/TransformDialect.h" #include "mlir/Dialect/Transform/IR/TransformInterfaces.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" +#include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h" #include "mlir/Dialect/Vector/Transforms/VectorTransforms.h" #include "mlir/Dialect/X86Vector/Transforms.h" #include "mlir/Parser/Parser.h" @@ -82,10 +83,9 @@ DiagnosedSilenceableFailure transform::LowerVectorsOp::apply( // In the future we may want to more finely select particular stages. // Stage 1: contraction lowerings. - patterns.add(vectorTransformOptions, - ctx); + populateVectorContractLoweringPatterns( + patterns, vectorTransformOptions, /*benefit=*/1, + /*disableOuterProductLowering*/ true); vector::populateVectorTransferPermutationMapLoweringPatterns(patterns); // Stage 2: multi-reduction lowerings. @@ -93,8 +93,7 @@ DiagnosedSilenceableFailure transform::LowerVectorsOp::apply( patterns, vectorTransformOptions.vectorMultiReductionLowering); // Stage 3: Rewrite vector.transfer into full and partial parts. - patterns.add( - ctx, vectorTransformOptions); + populateVectorTransferFullPartialPatterns(patterns, vectorTransformOptions); // Stage 4: Lower vector transfers. vector::populateVectorTransferLoweringPatterns(patterns, maxTransferRank); @@ -107,8 +106,8 @@ DiagnosedSilenceableFailure transform::LowerVectorsOp::apply( vector::populateVectorShapeCastLoweringPatterns(patterns); // Stage 7: Lower vector.transpose. - vector::populateVectorTransposeLoweringPatterns(patterns, - vectorTransformOptions); + vector::populateVectorTransposeLoweringPatterns( + patterns, vectorTransformOptions, /*benefit=*/1); if (getTransposeAvx2Lowering()) x86vector::avx2::populateSpecializedTransposeLoweringPatterns( patterns, avx2LoweringOptions, /*benefit=*/10); diff --git a/mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt index 6fb1b8c18a122..f17208e193b3c 100644 --- a/mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt @@ -1,14 +1,20 @@ add_mlir_dialect_library(MLIRVectorTransforms BufferizableOpInterfaceImpl.cpp Bufferize.cpp + LowerVectorBroadcast.cpp + LowerVectorContract.cpp + LowerVectorGather.cpp LowerVectorMask.cpp + LowerVectorMultiReduction.cpp + LowerVectorScan.cpp + LowerVectorShapeCast.cpp + LowerVectorTransfer.cpp + LowerVectorTranspose.cpp VectorDistribute.cpp VectorDropLeadUnitDim.cpp VectorInsertExtractStridedSliceRewritePatterns.cpp - VectorMultiDimReductionTransforms.cpp VectorTransferOpTransforms.cpp VectorTransferSplitRewritePatterns.cpp - VectorTransferPermutationMapRewritePatterns.cpp VectorTransforms.cpp VectorUnroll.cpp diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorBroadcast.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorBroadcast.cpp new file mode 100644 index 0000000000000..ad538fe4a6828 --- /dev/null +++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorBroadcast.cpp @@ -0,0 +1,156 @@ +//===- LowerVectorBroadcast.cpp - Lower 'vector.broadcast' operation ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements target-independent rewrites and utilities to lower the +// 'vector.broadcast' operation. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Arith/Utils/Utils.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Utils/IndexingUtils.h" +#include "mlir/Dialect/Utils/StructuredOpsUtils.h" +#include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" +#include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h" +#include "mlir/Dialect/Vector/Utils/VectorUtils.h" +#include "mlir/IR/BuiltinAttributeInterfaces.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/ImplicitLocOpBuilder.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/Matchers.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/TypeUtilities.h" +#include "mlir/Interfaces/VectorInterfaces.h" +#include "mlir/Support/LogicalResult.h" + +#define DEBUG_TYPE "vector-broadcast-lowering" + +using namespace mlir; +using namespace mlir::vector; + +namespace { +/// Progressive lowering of BroadcastOp. +class BroadcastOpLowering : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::BroadcastOp op, + PatternRewriter &rewriter) const override { + auto loc = op.getLoc(); + VectorType dstType = op.getResultVectorType(); + VectorType srcType = op.getSourceType().dyn_cast(); + Type eltType = dstType.getElementType(); + + // Scalar to any vector can use splat. + if (!srcType) { + rewriter.replaceOpWithNewOp(op, dstType, op.getSource()); + return success(); + } + + // Determine rank of source and destination. + int64_t srcRank = srcType.getRank(); + int64_t dstRank = dstType.getRank(); + + // Stretching scalar inside vector (e.g. vector<1xf32>) can use splat. + if (srcRank <= 1 && dstRank == 1) { + Value ext; + if (srcRank == 0) + ext = rewriter.create(loc, op.getSource()); + else + ext = rewriter.create(loc, op.getSource(), 0); + rewriter.replaceOpWithNewOp(op, dstType, ext); + return success(); + } + + // Duplicate this rank. + // For example: + // %x = broadcast %y : k-D to n-D, k < n + // becomes: + // %b = broadcast %y : k-D to (n-1)-D + // %x = [%b,%b,%b,%b] : n-D + // becomes: + // %b = [%y,%y] : (n-1)-D + // %x = [%b,%b,%b,%b] : n-D + if (srcRank < dstRank) { + // Duplication. + VectorType resType = + VectorType::get(dstType.getShape().drop_front(), eltType); + Value bcst = + rewriter.create(loc, resType, op.getSource()); + Value result = rewriter.create( + loc, dstType, rewriter.getZeroAttr(dstType)); + for (int64_t d = 0, dim = dstType.getDimSize(0); d < dim; ++d) + result = rewriter.create(loc, bcst, result, d); + rewriter.replaceOp(op, result); + return success(); + } + + // Find non-matching dimension, if any. + assert(srcRank == dstRank); + int64_t m = -1; + for (int64_t r = 0; r < dstRank; r++) + if (srcType.getDimSize(r) != dstType.getDimSize(r)) { + m = r; + break; + } + + // All trailing dimensions are the same. Simply pass through. + if (m == -1) { + rewriter.replaceOp(op, op.getSource()); + return success(); + } + + // Any non-matching dimension forces a stretch along this rank. + // For example: + // %x = broadcast %y : vector<4x1x2xf32> to vector<4x2x2xf32> + // becomes: + // %a = broadcast %y[0] : vector<1x2xf32> to vector<2x2xf32> + // %b = broadcast %y[1] : vector<1x2xf32> to vector<2x2xf32> + // %c = broadcast %y[2] : vector<1x2xf32> to vector<2x2xf32> + // %d = broadcast %y[3] : vector<1x2xf32> to vector<2x2xf32> + // %x = [%a,%b,%c,%d] + // becomes: + // %u = broadcast %y[0][0] : vector<2xf32> to vector <2x2xf32> + // %v = broadcast %y[1][0] : vector<2xf32> to vector <2x2xf32> + // %a = [%u, %v] + // .. + // %x = [%a,%b,%c,%d] + VectorType resType = + VectorType::get(dstType.getShape().drop_front(), eltType); + Value result = rewriter.create( + loc, dstType, rewriter.getZeroAttr(dstType)); + if (m == 0) { + // Stetch at start. + Value ext = rewriter.create(loc, op.getSource(), 0); + Value bcst = rewriter.create(loc, resType, ext); + for (int64_t d = 0, dim = dstType.getDimSize(0); d < dim; ++d) + result = rewriter.create(loc, bcst, result, d); + } else { + // Stetch not at start. + for (int64_t d = 0, dim = dstType.getDimSize(0); d < dim; ++d) { + Value ext = rewriter.create(loc, op.getSource(), d); + Value bcst = rewriter.create(loc, resType, ext); + result = rewriter.create(loc, bcst, result, d); + } + } + rewriter.replaceOp(op, result); + return success(); + } +}; +} // namespace + +void mlir::vector::populateVectorBroadcastLoweringPatterns( + RewritePatternSet &patterns, PatternBenefit benefit) { + patterns.add(patterns.getContext(), benefit); +} diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorContract.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorContract.cpp new file mode 100644 index 0000000000000..1280cfef0b645 --- /dev/null +++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorContract.cpp @@ -0,0 +1,1329 @@ +//===- LowerVectorContract.cpp - Lower 'vector.contract' operation --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements target-independent rewrites and utilities to lower the +// 'vector.contract' operation. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Arith/Utils/Utils.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Utils/IndexingUtils.h" +#include "mlir/Dialect/Utils/StructuredOpsUtils.h" +#include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" +#include "mlir/Dialect/Vector/Utils/VectorUtils.h" +#include "mlir/IR/BuiltinAttributeInterfaces.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/ImplicitLocOpBuilder.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/Matchers.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/TypeUtilities.h" +#include "mlir/Interfaces/VectorInterfaces.h" +#include "mlir/Support/LogicalResult.h" + +#define DEBUG_TYPE "vector-contract-lowering" + +using namespace mlir; +using namespace mlir::vector; + +//===----------------------------------------------------------------------===// +// Helper functions +//===----------------------------------------------------------------------===// + +// Helper to find an index in an affine map. +static std::optional getResultIndex(AffineMap map, int64_t index) { + for (int64_t i = 0, e = map.getNumResults(); i < e; ++i) { + int64_t idx = map.getDimPosition(i); + if (idx == index) + return i; + } + return std::nullopt; +} + +// Helper to construct iterator types with one index removed. +static SmallVector adjustIter(ArrayAttr iteratorTypes, + int64_t index) { + SmallVector results; + for (const auto &it : llvm::enumerate(iteratorTypes)) { + int64_t idx = it.index(); + if (idx == index) + continue; + results.push_back(it.value()); + } + return results; +} + +// Helper to construct an affine map with one index removed. +static AffineMap adjustMap(AffineMap map, int64_t index, + PatternRewriter &rewriter) { + auto *ctx = rewriter.getContext(); + SmallVector results; + for (int64_t i = 0, e = map.getNumResults(); i < e; ++i) { + int64_t idx = map.getDimPosition(i); + if (idx == index) + continue; + // Re-insert remaining indices, but renamed when occurring + // after the removed index. + auto targetExpr = getAffineDimExpr(idx < index ? idx : idx - 1, ctx); + results.push_back(targetExpr); + } + return AffineMap::get(map.getNumDims() - 1, 0, results, ctx); +} + +// Helper method to possibly drop a dimension in a load. +// TODO +static Value reshapeLoad(Location loc, Value val, VectorType type, + int64_t index, int64_t pos, + PatternRewriter &rewriter) { + if (index == -1) + return val; + Type lowType = VectorType::Builder(type).dropDim(0); + // At extraction dimension? + if (index == 0) { + auto posAttr = rewriter.getI64ArrayAttr(pos); + return rewriter.create(loc, lowType, val, posAttr); + } + // Unroll leading dimensions. + VectorType vType = lowType.cast(); + Type resType = VectorType::Builder(type).dropDim(index); + auto resVectorType = resType.cast(); + Value result = rewriter.create( + loc, resVectorType, rewriter.getZeroAttr(resVectorType)); + for (int64_t d = 0, e = resVectorType.getDimSize(0); d < e; d++) { + auto posAttr = rewriter.getI64ArrayAttr(d); + Value ext = rewriter.create(loc, vType, val, posAttr); + Value load = reshapeLoad(loc, ext, vType, index - 1, pos, rewriter); + result = rewriter.create(loc, resVectorType, load, result, + posAttr); + } + return result; +} + +// Helper method to possibly drop a dimension in a store. +// TODO +static Value reshapeStore(Location loc, Value val, Value result, + VectorType type, int64_t index, int64_t pos, + PatternRewriter &rewriter) { + // Unmodified? + if (index == -1) + return val; + // At insertion dimension? + if (index == 0) { + auto posAttr = rewriter.getI64ArrayAttr(pos); + return rewriter.create(loc, type, val, result, posAttr); + } + // Unroll leading dimensions. + Type lowType = VectorType::Builder(type).dropDim(0); + VectorType vType = lowType.cast(); + Type insType = VectorType::Builder(vType).dropDim(0); + for (int64_t d = 0, e = type.getDimSize(0); d < e; d++) { + auto posAttr = rewriter.getI64ArrayAttr(d); + Value ext = rewriter.create(loc, vType, result, posAttr); + Value ins = rewriter.create(loc, insType, val, posAttr); + Value sto = reshapeStore(loc, ins, ext, vType, index - 1, pos, rewriter); + result = rewriter.create(loc, type, sto, result, posAttr); + } + return result; +} + +/// Helper to create arithmetic operation associated with a kind of contraction. +static std::optional +createContractArithOp(Location loc, Value x, Value y, Value acc, + vector::CombiningKind kind, PatternRewriter &rewriter, + bool isInt, Value mask = Value()) { + using vector::CombiningKind; + Value mul; + + if (isInt) { + if (kind == CombiningKind::MINF || kind == CombiningKind::MAXF) + // Only valid for floating point types. + return std::nullopt; + mul = rewriter.create(loc, x, y); + } else { + // Float case. + if (kind == CombiningKind::AND || kind == CombiningKind::MINUI || + kind == CombiningKind::MINSI || kind == CombiningKind::MAXUI || + kind == CombiningKind::MAXSI || kind == CombiningKind::OR || + kind == CombiningKind::XOR) + // Only valid for integer types. + return std::nullopt; + // Special case for fused multiply-add. + if (acc && acc.getType().isa() && kind == CombiningKind::ADD) { + Value fma = rewriter.create(loc, x, y, acc); + if (mask) + // The fma op doesn't need explicit masking. However, fma ops used in + // reductions must preserve previous 'acc' values for masked-out lanes. + fma = selectPassthru(rewriter, mask, fma, acc); + return fma; + } + mul = rewriter.create(loc, x, y); + } + + if (!acc) + return std::optional(mul); + + return makeArithReduction(rewriter, loc, kind, mul, acc, mask); +} + +/// Return the positions of the reductions in the given map. +static SmallVector getReductionIndex(AffineMap map, + ArrayAttr iteratorTypes) { + SmallVector dimsIdx; + for (unsigned i = 0, e = map.getNumResults(); i < e; i++) { + if (isReductionIterator(iteratorTypes[map.getDimPosition(i)])) + dimsIdx.push_back(i); + } + return dimsIdx; +} + +/// Look for a given dimension in an affine map and return its position. Return +/// std::nullopt if the dimension is not in the map results. +static std::optional getDimPosition(AffineMap map, unsigned dim) { + for (unsigned i = 0, e = map.getNumResults(); i < e; i++) { + if (map.getDimPosition(i) == dim) + return i; + } + return std::nullopt; +} + +/// Creates an AddIOp if `isInt` is true otherwise create an arith::AddFOp using +/// operands `x` and `y`. +static Value createAdd(Location loc, Value x, Value y, bool isInt, + PatternRewriter &rewriter) { + if (isInt) + return rewriter.create(loc, x, y); + return rewriter.create(loc, x, y); +} + +/// Creates a MulIOp if `isInt` is true otherwise create an MulFOp using +/// operands `x and `y`. +static Value createMul(Location loc, Value x, Value y, bool isInt, + PatternRewriter &rewriter) { + if (isInt) + return rewriter.create(loc, x, y); + return rewriter.create(loc, x, y); +} + +namespace { + +/// Progressive lowering of a `vector.contract %a, %b, %c` with row-major matmul +/// semantics to: +/// ``` +/// %flattened_a = vector.shape_cast %a +/// %flattened_b = vector.shape_cast %b +/// %flattened_d = vector.matmul %flattened_a, %flattened_b +/// %d = vector.shape_cast %%flattened_d +/// %e = add %c, %d +/// ``` +/// `vector.matmul` later lowers to `llvm.matrix.multiply`. +// +/// This only kicks in when VectorTransformsOptions is set to OuterProduct and +/// the vector.contract op is a row-major matrix multiply. +class ContractionOpToMatmulOpLowering + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + using FilterConstraintType = + std::function; + + static LogicalResult defaultFilter(vector::ContractionOp op) { + return success(); + } + + ContractionOpToMatmulOpLowering( + vector::VectorTransformsOptions vectorTransformOptions, + MLIRContext *context, PatternBenefit benefit = 1, + FilterConstraintType constraint = defaultFilter) + : OpRewritePattern(context, benefit), + vectorTransformOptions(vectorTransformOptions), + filter(std::move(constraint)) {} + + LogicalResult matchAndRewrite(vector::ContractionOp op, + PatternRewriter &rewriter) const override; + +private: + /// Options to control the vector patterns. + vector::VectorTransformsOptions vectorTransformOptions; + FilterConstraintType filter; +}; + +/// Progressive lowering of a `vector.contract %a, %b, %c` with row-major matmul +/// semantics to a reduction_size-unrolled sequence: +/// ``` +/// %at = vector.transpose %a, [1, 0] +/// %bRow0 = vector.extract %b[0] +/// %atRow0 = vector.extract %at[0] +/// %c0 = vector.outerproduct %atRow0, %bRow0, %c +/// ... +/// %bRowK = vector.extract %b[K] +/// %atRowK = vector.extract %at[K] +/// %cK = vector.outerproduct %atRowK, %bRowK, %cK-1 +/// ``` +/// +/// This only kicks in when VectorTransformsOptions is set to OuterProduct and +/// the vector.contract op is a row-major matrix multiply. +class ContractionOpToOuterProductOpLowering + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + using FilterConstraintType = + std::function; + + static LogicalResult defaultFilter(vector::ContractionOp op) { + return success(); + } + + ContractionOpToOuterProductOpLowering( + vector::VectorTransformsOptions vectorTransformOptions, + MLIRContext *context, PatternBenefit benefit = 1, + FilterConstraintType constraint = defaultFilter) + : OpRewritePattern(context, benefit), + vectorTransformOptions(vectorTransformOptions), + filter(std::move(constraint)) {} + + LogicalResult matchAndRewrite(vector::ContractionOp op, + PatternRewriter &rewriter) const override; + +private: + /// Options to control the vector patterns. + vector::VectorTransformsOptions vectorTransformOptions; + FilterConstraintType filter; +}; + +/// Progressive lowering of a `vector.contract %a, %b, %c` with row-major matmul +/// semantics to an output-size-unrolled sequence: +/// ``` +/// %out = arith.constant ... : vector +/// %bt = vector.transpose %b, [1, 0] +/// %aRow0 = vector.extract %a[0] +/// %btRow0 = vector.extract %bt[0] +/// %c00 = vector.reduce %atRow0, %bRow0 +/// %out00 = vector.insert %c00, %out[0, 0] +/// ... +/// %aRowLast = vector.extract %at[M-1] +/// %btRowLast = vector.extract %b[N-1] +/// %cLastLast = vector.reduce %atRowLast, %bRowLast +/// %outcLastLast = vector.insert %cLastLast, %out[M-1, N-1] +/// ``` +/// +/// This only kicks in when VectorTransformsOptions is set to Dot and +/// the vector.contract op is a row-major matmul or matvec. +class ContractionOpToDotLowering + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + using FilterConstraintType = + std::function; + + static LogicalResult defaultFilter(vector::ContractionOp op) { + return success(); + } + + ContractionOpToDotLowering( + vector::VectorTransformsOptions vectorTransformOptions, + MLIRContext *context, PatternBenefit benefit = 1, + const FilterConstraintType &constraint = defaultFilter) + : OpRewritePattern(context, benefit), + vectorTransformOptions(vectorTransformOptions), filter(defaultFilter) {} + + LogicalResult matchAndRewrite(vector::ContractionOp op, + PatternRewriter &rewriter) const override; + +private: + /// Options to control the vector patterns. + vector::VectorTransformsOptions vectorTransformOptions; + FilterConstraintType filter; +}; + +/// Progressive lowering of ContractionOp. +/// +/// One: +/// %x = vector.contract with at least one free/batch dimension +/// is replaced by: +/// %a = vector.contract with one less free/batch dimension +/// %b = vector.contract with one less free/batch dimension +/// .. +/// %x = combine %a %b .. +/// until a pure contraction is reached (no free/batch dimensions), +/// which is replaced by a dot-product. +/// +/// This only kicks in when either VectorTransformsOptions is set +/// to Dot or when other contraction patterns fail. +class ContractionOpLowering : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + using FilterConstraintType = + std::function; + + static LogicalResult defaultFilter(vector::ContractionOp op) { + return success(); + } + + ContractionOpLowering(vector::VectorTransformsOptions vectorTransformOptions, + MLIRContext *context, PatternBenefit benefit = 1, + FilterConstraintType constraint = defaultFilter) + : OpRewritePattern(context, benefit), + vectorTransformOptions(vectorTransformOptions), + filter(std::move(constraint)) {} + + LogicalResult matchAndRewrite(vector::ContractionOp op, + PatternRewriter &rewriter) const override; + +private: + /// Options to control the vector patterns. + vector::VectorTransformsOptions vectorTransformOptions; + FilterConstraintType filter; + // Lower one parallel dimension. + FailureOr lowerParallel(PatternRewriter &rewriter, + vector::ContractionOp op, int64_t lhsIndex, + int64_t rhsIndex, Value mask) const; + // Lower one reduction dimension. + FailureOr lowerReduction(PatternRewriter &rewriter, + vector::ContractionOp op, Value mask) const; +}; + +/// Generate a vector implementation for matmat, matvec and tmatvec. +/// This unrolls outer-products along the reduction dimension. +struct UnrolledOuterProductGenerator + : public StructuredGenerator { + UnrolledOuterProductGenerator(RewriterBase &b, vector::ContractionOp op) + : StructuredGenerator(b, op), + kind(op.getKind()), lhs(op.getLhs()), rhs(op.getRhs()), + res(op.getAcc()), lhsType(op.getLhsType()) { + auto maskableOp = cast(op.getOperation()); + if (maskableOp.isMasked()) + mask = maskableOp.getMaskingOp().getMask(); + } + + Value t(Value v, ArrayRef perm = {1, 0}) { + if (!v) + return v; + return rewriter.create(loc, v, perm); + } + + Value promote(Value v, Type dstElementType) { + Type elementType = v.getType(); + auto vecType = elementType.dyn_cast(); + if (vecType) + elementType = vecType.getElementType(); + if (elementType == dstElementType) + return v; + Type promotedType = dstElementType; + if (vecType) + promotedType = VectorType::get(vecType.getShape(), promotedType); + if (dstElementType.isa()) + return rewriter.create(loc, promotedType, v); + return rewriter.create(loc, promotedType, v); + } + + FailureOr outerProd(Value lhs, Value rhs, Value res, int reductionSize, + std::optional maybeMask = std::nullopt) { + assert(reductionSize > 0); + // Incremental support for masking. + if (mask && !maybeMask.has_value()) + return failure(); + + Type resElementType = res.getType().cast().getElementType(); + for (int64_t k = 0; k < reductionSize; ++k) { + Value extractA = rewriter.create(loc, lhs, k); + Value extractB = rewriter.create(loc, rhs, k); + extractA = promote(extractA, resElementType); + extractB = promote(extractB, resElementType); + Value extractMask; + if (maybeMask.has_value() && maybeMask.value()) + extractMask = + rewriter.create(loc, maybeMask.value(), k); + + Operation *outerProdOp = rewriter.create( + loc, res.getType(), extractA, extractB, res, kind); + res = maskOperation(rewriter, outerProdOp, extractMask)->getResult(0); + } + return res; + } + + /// Two outer parallel, one inner reduction (matmat flavor). + FailureOr matmat() { + if (!iters({Par(), Par(), Red()})) + return failure(); + // Set up the parallel/reduction structure in the right form. + AffineExpr m, n, k; + bindDims(rewriter.getContext(), m, n, k); + // Classical row-major matmul: Just permute the lhs. + if (layout({{m, k}, {k, n}, {m, n}})) + return outerProd(t(lhs), rhs, res, lhsType.getDimSize(1), + t(mask, {2, 0, 1})); + // TODO: may be better to fail and use some vector -> scalar reduction. + if (layout({{m, k}, {n, k}, {m, n}})) { + Value tlhs = t(lhs); + return outerProd(tlhs, t(rhs), res, lhsType.getDimSize(1)); + } + // No need to permute anything. + if (layout({{k, m}, {k, n}, {m, n}})) + return outerProd(lhs, rhs, res, lhsType.getDimSize(0)); + // Just permute the rhs. + if (layout({{k, m}, {n, k}, {m, n}})) + return outerProd(lhs, t(rhs), res, lhsType.getDimSize(0)); + // Transposed output: swap RHS and LHS. + // Classical row-major matmul: permute the lhs. + if (layout({{m, k}, {k, n}, {n, m}})) + return outerProd(rhs, t(lhs), res, lhsType.getDimSize(1)); + // TODO: may be better to fail and use some vector -> scalar reduction. + if (layout({{m, k}, {n, k}, {n, m}})) { + Value trhs = t(rhs); + return outerProd(trhs, t(lhs), res, lhsType.getDimSize(1)); + } + if (layout({{k, m}, {k, n}, {n, m}})) + return outerProd(rhs, lhs, res, lhsType.getDimSize(0)); + if (layout({{k, m}, {n, k}, {n, m}})) + return outerProd(t(rhs), lhs, res, lhsType.getDimSize(0)); + return failure(); + } + + /// One outer parallel, one inner reduction (matvec flavor) + FailureOr matvec() { + if (!iters({Par(), Red()})) + return failure(); + AffineExpr m, k; + bindDims(rewriter.getContext(), m, k); + + // Case mat-vec: transpose. + if (layout({{m, k}, {k}, {m}})) + return outerProd(t(lhs), rhs, res, lhsType.getDimSize(1), t(mask)); + // Case mat-trans-vec: ready to go. + if (layout({{k, m}, {k}, {m}})) + return outerProd(lhs, rhs, res, lhsType.getDimSize(0)); + // Case vec-mat: swap and transpose. + if (layout({{k}, {m, k}, {m}})) + return outerProd(t(rhs), lhs, res, lhsType.getDimSize(0)); + // Case vec-mat-trans: swap and ready to go. + if (layout({{k}, {k, m}, {m}})) + return outerProd(rhs, lhs, res, lhsType.getDimSize(0)); + return failure(); + } + + // + // One outer reduction, one inner parallel (tmatvec flavor) + // + FailureOr tmatvec() { + if (!iters({Red(), Par()})) + return failure(); + AffineExpr k, m; + bindDims(rewriter.getContext(), k, m); + + // Case mat-vec: transpose. + if (layout({{m, k}, {k}, {m}})) + return outerProd(t(lhs), rhs, res, lhsType.getDimSize(1)); + // Case mat-trans-vec: ready to go. + if (layout({{k, m}, {k}, {m}})) + return outerProd(lhs, rhs, res, lhsType.getDimSize(0)); + // Case vec-mat: swap and transpose. + if (layout({{k}, {m, k}, {m}})) + return outerProd(t(rhs), lhs, res, lhsType.getDimSize(0)); + // Case vec-mat-trans: swap and ready to go. + if (layout({{k}, {k, m}, {m}})) + return outerProd(rhs, lhs, res, lhsType.getDimSize(0)); + return failure(); + } + +private: + vector::CombiningKind kind; + Value lhs, rhs, res, mask; + VectorType lhsType; +}; + +/// Progressively lower a `vector.contract %a, %b, %c` with row-major matmul +/// semantics to a reduction_size-unrolled sequence: +/// ``` +/// %at = vector.transpose %a, [1, 0] +/// %bRow0 = vector.extract %b[0] +/// %atRow0 = vector.extract %at[0] +/// %c0 = vector.outerproduct %atRow0, %bRow0, %c +/// ... +/// %bRowK = vector.extract %b[K] +/// %atRowK = vector.extract %at[K] +/// %cK = vector.outerproduct %atRowK, %bRowK, %cK-1 +/// ``` +/// +/// This only kicks in when VectorTransformsOptions is set to OuterProduct but +/// otherwise supports any layout permutation of the matrix-multiply. +LogicalResult ContractionOpToOuterProductOpLowering::matchAndRewrite( + vector::ContractionOp op, PatternRewriter &rewriter) const { + // TODO: Remove native masks from contraction op? + if (!op.getMasks().empty()) + return failure(); + + if (vectorTransformOptions.vectorContractLowering != + vector::VectorContractLowering::OuterProduct) + return failure(); + + if (failed(filter(op))) + return failure(); + + // Vector mask setup. + OpBuilder::InsertionGuard guard(rewriter); + auto maskableOp = cast(op.getOperation()); + Operation *rootOp; + if (maskableOp.isMasked()) { + rewriter.setInsertionPoint(maskableOp.getMaskingOp()); + rootOp = maskableOp.getMaskingOp(); + } else { + rootOp = op; + } + + UnrolledOuterProductGenerator e(rewriter, op); + FailureOr matmatRes = e.matmat(); + if (succeeded(matmatRes)) { + rewriter.replaceOp(rootOp, *matmatRes); + return success(); + } + FailureOr matvecRes = e.matvec(); + if (succeeded(matvecRes)) { + rewriter.replaceOp(rootOp, *matvecRes); + return success(); + } + FailureOr tmatvecRes = e.tmatvec(); + if (succeeded(tmatvecRes)) { + rewriter.replaceOp(rootOp, *tmatvecRes); + return success(); + } + + return failure(); +} + +LogicalResult +ContractionOpToDotLowering::matchAndRewrite(vector::ContractionOp op, + PatternRewriter &rewriter) const { + // TODO: Support vector.mask. + auto maskableOp = cast(op.getOperation()); + if (maskableOp.isMasked()) + return failure(); + + // TODO: Remove native masks from contraction op? + if (!op.getMasks().empty()) + return failure(); + + if (failed(filter(op))) + return failure(); + + if (vectorTransformOptions.vectorContractLowering != + vector::VectorContractLowering::Dot) + return failure(); + + auto iteratorTypes = op.getIteratorTypes().getValue(); + static constexpr std::array perm = {1, 0}; + Location loc = op.getLoc(); + Value lhs = op.getLhs(), rhs = op.getRhs(); + + using MapList = ArrayRef>; + auto infer = [](MapList m) { return AffineMap::inferFromExprList(m); }; + AffineExpr m, n, k; + bindDims(rewriter.getContext(), m, n, k); + SmallVector maps = op.getIndexingMapsArray(); + // + // In the following we wish to make the reduction dimension innermost so we + // can load vectors and just fmul + reduce into a scalar. + // + if (isParallelIterator(iteratorTypes[0]) && + isParallelIterator(iteratorTypes[1]) && + isReductionIterator(iteratorTypes[2])) { + // + // Two outer parallel, one inner reduction (matmat flavor). + // + if (maps == infer({{m, k}, {k, n}, {m, n}})) { + rhs = rewriter.create(loc, rhs, perm); + } else if (maps == infer({{m, k}, {n, k}, {m, n}})) { + // No need to permute anything. + } else if (maps == infer({{k, m}, {k, n}, {m, n}})) { + lhs = rewriter.create(loc, lhs, perm); + rhs = rewriter.create(loc, rhs, perm); + } else if (maps == infer({{k, m}, {n, k}, {m, n}})) { + lhs = rewriter.create(loc, lhs, perm); + } else if (maps == infer({{m, k}, {k, n}, {n, m}})) { + // This is the classical row-major matmul. Just permute the lhs. + Value tmp = lhs; + lhs = rewriter.create(loc, rhs, perm); + rhs = tmp; + } else if (maps == infer({{m, k}, {n, k}, {n, m}})) { + std::swap(lhs, rhs); + } else if (maps == infer({{k, m}, {k, n}, {n, m}})) { + Value tmp = lhs; + lhs = rewriter.create(loc, rhs, perm); + rhs = rewriter.create(loc, tmp, perm); + } else if (maps == infer({{k, m}, {n, k}, {n, m}})) { + Value tmp = rhs; + rhs = rewriter.create(loc, lhs, perm); + lhs = tmp; + } else { + return failure(); + } + } else if (isParallelIterator(iteratorTypes[0]) && + isReductionIterator(iteratorTypes[1])) { + // + // One outer parallel, one inner reduction (matvec flavor) + // + if (maps == infer({{m, n}, {n}, {m}})) { + // No need to permute anything. + } else if (maps == infer({{n, m}, {n}, {m}})) { + lhs = rewriter.create(loc, lhs, perm); + } else if (maps == infer({{n}, {m, n}, {m}})) { + std::swap(lhs, rhs); + } else if (maps == infer({{n}, {n, m}, {m}})) { + std::swap(lhs, rhs); + lhs = rewriter.create(loc, lhs, perm); + } else { + return failure(); + } + } else { + return failure(); + } + + VectorType dstType = op.getResultType().cast(); + assert(dstType.getRank() >= 1 && dstType.getRank() <= 2 && + "Expected dst type of rank 1 or 2"); + + unsigned rank = dstType.getRank(); + unsigned dstRows = dstType.getShape()[0]; + unsigned dstColumns = rank == 1 ? 1 : dstType.getShape()[1]; + + // ExtractOp does not allow dynamic indexing, we must unroll explicitly. + Value res = rewriter.create(loc, dstType, + rewriter.getZeroAttr(dstType)); + bool isInt = dstType.getElementType().isa(); + for (unsigned r = 0; r < dstRows; ++r) { + Value a = rewriter.create(op.getLoc(), lhs, r); + for (unsigned c = 0; c < dstColumns; ++c) { + Value b = rank == 1 + ? rhs + : rewriter.create(op.getLoc(), rhs, c); + Value m = createMul(op.getLoc(), a, b, isInt, rewriter); + Value reduced = rewriter.create( + op.getLoc(), vector::CombiningKind::ADD, m); + + SmallVector pos = rank == 1 ? SmallVector{r} + : SmallVector{r, c}; + res = rewriter.create(op.getLoc(), reduced, res, pos); + } + } + if (auto acc = op.getAcc()) + res = createAdd(op.getLoc(), res, acc, isInt, rewriter); + rewriter.replaceOp(op, res); + return success(); +} + +/// Lower vector.contract with all size one reduction dimensions to +/// elementwise ops when possible. +struct ContractOpToElementwise + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + using FilterConstraintType = + std::function; + static LogicalResult defaultFilter(vector::ContractionOp op) { + return success(); + } + ContractOpToElementwise( + vector::VectorTransformsOptions vectorTransformOptions, + MLIRContext *context, PatternBenefit benefit = 1, + const FilterConstraintType &constraint = defaultFilter) + : OpRewritePattern(context, benefit), + vectorTransformOptions(vectorTransformOptions), filter(defaultFilter) {} + + LogicalResult matchAndRewrite(vector::ContractionOp contractOp, + PatternRewriter &rewriter) const override { + // TODO: Support vector.mask. + auto maskableOp = cast(contractOp.getOperation()); + if (maskableOp.isMasked()) + return failure(); + + // TODO: Remove native masks from contraction op? + if (!contractOp.getMasks().empty()) + return failure(); + + if (failed(filter(contractOp))) + return failure(); + + if (vectorTransformOptions.vectorContractLowering != + vector::VectorContractLowering::ParallelArith) + return failure(); + + ArrayRef lhsShape = contractOp.getLhsType().getShape(); + ArrayRef rhsShape = contractOp.getRhsType().getShape(); + AffineMap lhsMap = contractOp.getIndexingMapsArray()[0]; + AffineMap rhsMap = contractOp.getIndexingMapsArray()[1]; + SmallVector lhsReductionDims = + getReductionIndex(lhsMap, contractOp.getIteratorTypes()); + SmallVector rhsReductionDims = + getReductionIndex(rhsMap, contractOp.getIteratorTypes()); + // All the reduction dimensions must be a size 1. + for (int64_t dim : lhsReductionDims) { + if (lhsShape[dim] != 1) + return failure(); + } + for (int64_t dim : rhsReductionDims) { + if (rhsShape[dim] != 1) + return failure(); + } + AffineMap accMap = contractOp.getIndexingMapsArray()[2]; + unsigned numParallelDims = accMap.getNumResults(); + unsigned numLhsDimToBroadcast = + numParallelDims - (lhsMap.getNumResults() - lhsReductionDims.size()); + unsigned numRhsDimToBroadcast = + numParallelDims - (rhsMap.getNumResults() - rhsReductionDims.size()); + SmallVector lhsDims; + SmallVector lhsTranspose; + SmallVector rhsDims; + SmallVector rhsTranspose; + for (int64_t dim : lhsReductionDims) + lhsTranspose.push_back(numLhsDimToBroadcast + dim); + for (int64_t dim : rhsReductionDims) + rhsTranspose.push_back(numRhsDimToBroadcast + dim); + // Loop through the parallel dimensions to calculate the dimensions to + // broadcast and to permute in order to extract only parallel dimensions. + for (unsigned i = 0; i < numParallelDims; i++) { + std::optional lhsDim = + getDimPosition(lhsMap, accMap.getDimPosition(i)); + if (lhsDim) { + lhsTranspose.push_back(numLhsDimToBroadcast + *lhsDim); + } else { + // If the parallel dimension doesn't exist we will have to broadcast it. + lhsDims.push_back( + contractOp.getResultType().cast().getDimSize(i)); + lhsTranspose.push_back(lhsDims.size() - 1); + } + std::optional rhsDim = + getDimPosition(rhsMap, accMap.getDimPosition(i)); + if (rhsDim) { + rhsTranspose.push_back(numRhsDimToBroadcast + *rhsDim); + } else { + // If the parallel dimension doesn't exist we will have to broadcast it. + rhsDims.push_back( + contractOp.getResultType().cast().getDimSize(i)); + rhsTranspose.push_back(rhsDims.size() - 1); + } + } + Value newLhs = contractOp.getLhs(); + Value newRhs = contractOp.getRhs(); + Location loc = contractOp.getLoc(); + if (!lhsDims.empty()) { + lhsDims.append(lhsShape.begin(), lhsShape.end()); + auto expandedType = + VectorType::get(lhsDims, contractOp.getLhsType().getElementType()); + newLhs = rewriter.create(loc, expandedType, newLhs); + } + if (!rhsDims.empty()) { + rhsDims.append(rhsShape.begin(), rhsShape.end()); + auto expandedType = + VectorType::get(rhsDims, contractOp.getRhsType().getElementType()); + newRhs = rewriter.create(loc, expandedType, newRhs); + } + bool isInt = contractOp.getLhsType().getElementType().isIntOrIndex(); + newLhs = rewriter.create(loc, newLhs, lhsTranspose); + newRhs = rewriter.create(loc, newRhs, rhsTranspose); + SmallVector lhsOffsets(lhsReductionDims.size(), 0); + SmallVector rhsOffsets(rhsReductionDims.size(), 0); + newLhs = rewriter.create( + loc, newLhs, rewriter.getI64ArrayAttr(lhsOffsets)); + newRhs = rewriter.create( + loc, newRhs, rewriter.getI64ArrayAttr(rhsOffsets)); + std::optional result = + createContractArithOp(loc, newLhs, newRhs, contractOp.getAcc(), + contractOp.getKind(), rewriter, isInt); + rewriter.replaceOp(contractOp, {*result}); + return success(); + } + +private: + /// Options to control the vector patterns. + vector::VectorTransformsOptions vectorTransformOptions; + FilterConstraintType filter; +}; + +/// Progressive lowering of ContractionOp. +/// One: +/// %x = vector.contract with at least one free/batch dimension +/// is replaced by: +/// %a = vector.contract with one less free/batch dimension +/// %b = vector.contract with one less free/batch dimension +/// .. +/// %x = combine %a %b .. +/// until a pure contraction is reached (no free/batch dimensions), +/// which is replaced by a dot-product. +/// +/// This only kicks in when either VectorTransformsOptions is set +/// to DOT or when other contraction patterns fail. +// +// TODO: break down into transpose/reshape/cast ops +// when they become available to avoid code dup +// TODO: investigate lowering order impact on performance +LogicalResult +ContractionOpLowering::matchAndRewrite(vector::ContractionOp op, + PatternRewriter &rewriter) const { + // TODO: Remove native masks from contraction op? + if (!op.getMasks().empty()) + return failure(); + + if (failed(filter(op))) + return failure(); + + // TODO: support mixed mode contract lowering. + if (op.getLhsType().getElementType() != + getElementTypeOrSelf(op.getAccType()) || + op.getRhsType().getElementType() != getElementTypeOrSelf(op.getAccType())) + return failure(); + + // TODO: the code below assumes the default contraction, make sure it supports + // other kinds before enabling this lowering. + if (op.getKind() != vector::CombiningKind::ADD) { + return rewriter.notifyMatchFailure( + op, "contractions other than 'add' not supported"); + } + + // TODO: implement benefits, cost models. + MLIRContext *ctx = op.getContext(); + ContractionOpToMatmulOpLowering pat1(vectorTransformOptions, ctx); + if (succeeded(pat1.matchAndRewrite(op, rewriter))) + return success(); + ContractionOpToOuterProductOpLowering pat2(vectorTransformOptions, ctx); + if (succeeded(pat2.matchAndRewrite(op, rewriter))) + return success(); + ContractionOpToDotLowering pat3(vectorTransformOptions, ctx); + if (succeeded(pat3.matchAndRewrite(op, rewriter))) + return success(); + ContractOpToElementwise pat4(vectorTransformOptions, ctx); + if (succeeded(pat4.matchAndRewrite(op, rewriter))) + return success(); + + // Vector mask setup. + OpBuilder::InsertionGuard guard(rewriter); + Operation *rootOp = op; + Value mask; + if (op.isMasked()) { + rewriter.setInsertionPoint(op.getMaskingOp()); + rootOp = op.getMaskingOp(); + mask = op.getMaskingOp().getMask(); + } + + // Find first batch dimension in LHS/RHS, and lower when found. + std::vector> batchDimMap = op.getBatchDimMap(); + if (!batchDimMap.empty()) { + int64_t lhsIndex = batchDimMap[0].first; + int64_t rhsIndex = batchDimMap[0].second; + auto newOp = lowerParallel(rewriter, op, lhsIndex, rhsIndex, mask); + if (failed(newOp)) + return failure(); + rewriter.replaceOp(rootOp, *newOp); + return success(); + } + + // Collect contracting dimensions. + std::vector> contractingDimMap = + op.getContractingDimMap(); + DenseSet lhsContractingDimSet; + DenseSet rhsContractingDimSet; + for (auto &dimPair : contractingDimMap) { + lhsContractingDimSet.insert(dimPair.first); + rhsContractingDimSet.insert(dimPair.second); + } + + // Find first free dimension in LHS, and lower when found. + VectorType lhsType = op.getLhsType(); + for (int64_t lhsIndex = 0, e = lhsType.getRank(); lhsIndex < e; ++lhsIndex) { + if (lhsContractingDimSet.count(lhsIndex) == 0) { + auto newOp = lowerParallel(rewriter, op, lhsIndex, /*rhsIndex=*/-1, mask); + if (failed(newOp)) + return failure(); + rewriter.replaceOp(rootOp, *newOp); + return success(); + } + } + + // Find first free dimension in RHS, and lower when found. + VectorType rhsType = op.getRhsType(); + for (int64_t rhsIndex = 0, e = rhsType.getRank(); rhsIndex < e; ++rhsIndex) { + if (rhsContractingDimSet.count(rhsIndex) == 0) { + auto newOp = lowerParallel(rewriter, op, /*lhsIndex=*/-1, rhsIndex, mask); + if (failed(newOp)) + return failure(); + rewriter.replaceOp(rootOp, *newOp); + return success(); + } + } + + // Lower the first remaining reduction dimension. + if (!contractingDimMap.empty()) { + auto newOp = lowerReduction(rewriter, op, mask); + if (failed(newOp)) + return failure(); + rewriter.replaceOp(rootOp, *newOp); + return success(); + } + + return failure(); +} + +// Lower one parallel dimension. +// Incidentally also tolerates unit-size (hence trivial) reduction dimensions. +// TODO: consider reusing existing contract unrolling +FailureOr ContractionOpLowering::lowerParallel(PatternRewriter &rewriter, + vector::ContractionOp op, + int64_t lhsIndex, + int64_t rhsIndex, + Value mask) const { + VectorType lhsType = op.getLhsType(); + VectorType rhsType = op.getRhsType(); + VectorType resType = op.getResultType().cast(); + // Find the iterator type index and result index. + SmallVector iMap = op.getIndexingMapsArray(); + int64_t iterIndex = -1; + int64_t dimSize = -1; + if (lhsIndex >= 0) { + iterIndex = iMap[0].getDimPosition(lhsIndex); + if (rhsIndex >= 0 && iterIndex != iMap[1].getDimPosition(rhsIndex)) + return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { + diag << "expected lhsIndex=" << lhsIndex << " and rhsIndex=" << rhsIndex + << " to map to the same dimension"; + }); + dimSize = lhsType.getDimSize(lhsIndex); + } else if (rhsIndex >= 0) { + iterIndex = iMap[1].getDimPosition(rhsIndex); + dimSize = rhsType.getDimSize(rhsIndex); + } + if (iterIndex < 0) + return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { + diag << "expected either lhsIndex=" << lhsIndex + << " or rhsIndex=" << rhsIndex << " to be nonnegative"; + }); + // value_or(-1) means that we tolerate a dimension not appearing + // in the result map. That can't happen for actual parallel iterators, but + // the caller ContractionOpLowering::matchAndRewrite is currently calling + // lowerParallel also for the case of unit-size reduction dims appearing only + // on one of LHS or RHS, not both. At the moment, such cases are created by + // CastAwayContractionLeadingOneDim, so we need to either support that or + // modify that pattern. + int64_t resIndex = getResultIndex(iMap[2], iterIndex).value_or(-1); + if (resIndex == -1 && dimSize != 1) + return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { + diag << "expected the dimension for iterIndex=" << iterIndex + << " to either appear in the result map, or to be a unit dimension"; + }); + + // Construct new iterator types and affine map array attribute. + std::array lowIndexingMaps = { + adjustMap(iMap[0], iterIndex, rewriter), + adjustMap(iMap[1], iterIndex, rewriter), + adjustMap(iMap[2], iterIndex, rewriter)}; + auto lowAffine = rewriter.getAffineMapArrayAttr(lowIndexingMaps); + auto lowIter = + rewriter.getArrayAttr(adjustIter(op.getIteratorTypes(), iterIndex)); + // Unroll into a series of lower dimensional vector.contract ops. + Location loc = op.getLoc(); + Value result = rewriter.create( + loc, resType, rewriter.getZeroAttr(resType)); + + for (int64_t d = 0; d < dimSize; ++d) { + auto lhs = reshapeLoad(loc, op.getLhs(), lhsType, lhsIndex, d, rewriter); + auto rhs = reshapeLoad(loc, op.getRhs(), rhsType, rhsIndex, d, rewriter); + auto acc = reshapeLoad(loc, op.getAcc(), resType, resIndex, d, rewriter); + + Value lowMask; + if (mask) + lowMask = reshapeLoad(loc, mask, cast(mask.getType()), + iterIndex, d, rewriter); + + Operation *lowContract = rewriter.create( + loc, lhs, rhs, acc, lowAffine, lowIter); + lowContract = maskOperation(rewriter, lowContract, lowMask); + result = reshapeStore(loc, lowContract->getResult(0), result, resType, + resIndex, d, rewriter); + } + return result; +} + +// Lower one reduction dimension. +FailureOr ContractionOpLowering::lowerReduction( + PatternRewriter &rewriter, vector::ContractionOp op, Value mask) const { + auto loc = op.getLoc(); + VectorType lhsType = op.getLhsType(); + VectorType rhsType = op.getRhsType(); + Type resType = op.getResultType(); + if (resType.isa()) + return rewriter.notifyMatchFailure(op, + "did not expect a VectorType result"); + bool isInt = resType.isa(); + // Use iterator index 0. + int64_t iterIndex = 0; + SmallVector iMap = op.getIndexingMapsArray(); + std::optional lookupLhs = getResultIndex(iMap[0], iterIndex); + std::optional lookupRhs = getResultIndex(iMap[1], iterIndex); + if (!lookupLhs.has_value()) + return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { + diag << "expected iterIndex=" << iterIndex << "to map to a LHS dimension"; + }); + if (!lookupRhs.has_value()) + return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { + diag << "expected iterIndex=" << iterIndex << "to map to a RHS dimension"; + }); + int64_t lhsIndex = *lookupLhs; + int64_t rhsIndex = *lookupRhs; + int64_t dimSize = lhsType.getDimSize(lhsIndex); + if (dimSize != rhsType.getDimSize(rhsIndex)) + return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { + diag << "expect LHS dimension " << lhsIndex + << " to have the same size as RHS dimension " << rhsIndex; + }); + // Base case. + if (lhsType.getRank() == 1) { + if (rhsType.getRank() != 1) + return rewriter.notifyMatchFailure( + op, "When LHS has rank 1, expected also RHS to have rank 1"); + Value m = createMul(loc, op.getLhs(), op.getRhs(), isInt, rewriter); + auto kind = vector::CombiningKind::ADD; + + Value acc = op.getAcc(); + Operation *reductionOp = + acc ? rewriter.create(loc, kind, m, acc) + : rewriter.create(loc, kind, m); + return maskOperation(rewriter, reductionOp, mask)->getResult(0); + } + // Construct new iterator types and affine map array attribute. + std::array lowIndexingMaps = { + adjustMap(iMap[0], iterIndex, rewriter), + adjustMap(iMap[1], iterIndex, rewriter), + adjustMap(iMap[2], iterIndex, rewriter)}; + auto lowAffine = rewriter.getAffineMapArrayAttr(lowIndexingMaps); + auto lowIter = + rewriter.getArrayAttr(adjustIter(op.getIteratorTypes(), iterIndex)); + // Unroll into a series of lower dimensional vector.contract ops. + // By feeding the initial accumulator into the first contraction, + // and the result of each contraction into the next, eventually + // the sum of all reductions is computed. + Value result = op.getAcc(); + for (int64_t d = 0; d < dimSize; ++d) { + auto lhs = reshapeLoad(loc, op.getLhs(), lhsType, lhsIndex, d, rewriter); + auto rhs = reshapeLoad(loc, op.getRhs(), rhsType, rhsIndex, d, rewriter); + Value newMask; + if (mask) + newMask = reshapeLoad(loc, mask, cast(mask.getType()), + iterIndex, d, rewriter); + + Operation *newContract = rewriter.create( + loc, lhs, rhs, result, lowAffine, lowIter); + result = maskOperation(rewriter, newContract, newMask)->getResult(0); + } + return result; +} + +/// Progressive lowering of OuterProductOp. +/// One: +/// %x = vector.outerproduct %lhs, %rhs, %acc +/// is replaced by: +/// %z = zero-result +/// %0 = vector.extract %lhs[0] +/// %1 = vector.broadcast %0 +/// %2 = vector.extract %acc[0] +/// %3 = vector.fma %1, %rhs, %2 +/// %4 = vector.insert %3, %z[0] +/// .. +/// %x = vector.insert %.., %..[N-1] +/// +class OuterProductOpLowering : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::OuterProductOp op, + PatternRewriter &rewriter) const override { + auto loc = op.getLoc(); + + VectorType lhsType = op.getOperandVectorTypeLHS(); + VectorType rhsType = op.getOperandTypeRHS().dyn_cast(); + VectorType resType = op.getResultVectorType(); + Type eltType = resType.getElementType(); + bool isInt = eltType.isa(); + Value acc = (op.getAcc().empty()) ? nullptr : op.getAcc()[0]; + vector::CombiningKind kind = op.getKind(); + + // Vector mask setup. + OpBuilder::InsertionGuard guard(rewriter); + auto maskableOp = cast(op.getOperation()); + Operation *rootOp; + Value mask; + if (maskableOp.isMasked()) { + rewriter.setInsertionPoint(maskableOp.getMaskingOp()); + rootOp = maskableOp.getMaskingOp(); + mask = maskableOp.getMaskingOp().getMask(); + } else { + rootOp = op; + } + + if (!rhsType) { + // Special case: AXPY operation. + Value b = rewriter.create(loc, lhsType, op.getRhs()); + std::optional mult = createContractArithOp( + loc, op.getLhs(), b, acc, kind, rewriter, isInt, mask); + if (!mult.has_value()) + return failure(); + rewriter.replaceOp(rootOp, *mult); + return success(); + } + + Value result = rewriter.create( + loc, resType, rewriter.getZeroAttr(resType)); + for (int64_t d = 0, e = resType.getDimSize(0); d < e; ++d) { + auto pos = rewriter.getI64ArrayAttr(d); + Value x = rewriter.create(loc, op.getLhs(), pos); + Value a = rewriter.create(loc, rhsType, x); + Value r = nullptr; + if (acc) + r = rewriter.create(loc, acc, pos); + Value extrMask; + if (mask) + extrMask = rewriter.create(loc, mask, pos); + + std::optional m = createContractArithOp( + loc, a, op.getRhs(), r, kind, rewriter, isInt, extrMask); + if (!m.has_value()) + return failure(); + result = rewriter.create(loc, resType, *m, result, pos); + } + + rewriter.replaceOp(rootOp, result); + return success(); + } +}; + +/// Progressively lower a `vector.contract %a, %b, %c` with row-major matmul +/// semantics to: +/// ``` +/// %mta = maybe_transpose +/// %mtb = maybe_transpose +/// %flattened_a = vector.shape_cast %mta +/// %flattened_b = vector.shape_cast %mtb +/// %flattened_d = vector.matmul %flattened_a, %flattened_b +/// %mtd = vector.shape_cast %flattened_d +/// %d = maybe_untranspose %mtd +/// %e = add %c, %d +/// ``` +/// `vector.matmul` later lowers to `llvm.matrix.multiply`. +// +/// This only kicks in when VectorTransformsOptions is set to `Matmul`. +/// vector.transpose operations are inserted if the vector.contract op is not a +/// row-major matrix multiply. +LogicalResult +ContractionOpToMatmulOpLowering::matchAndRewrite(vector::ContractionOp op, + PatternRewriter &rew) const { + // TODO: Support vector.mask. + auto maskableOp = cast(op.getOperation()); + if (maskableOp.isMasked()) + return failure(); + + // TODO: Remove native masks from contraction op? + if (!op.getMasks().empty()) + return failure(); + if (vectorTransformOptions.vectorContractLowering != + vector::VectorContractLowering::Matmul) + return failure(); + if (failed(filter(op))) + return failure(); + + auto iteratorTypes = op.getIteratorTypes().getValue(); + if (!isParallelIterator(iteratorTypes[0]) || + !isParallelIterator(iteratorTypes[1]) || + !isReductionIterator(iteratorTypes[2])) + return failure(); + + Type elementType = op.getLhsType().getElementType(); + if (!elementType.isIntOrFloat()) + return failure(); + + Type dstElementType = op.getType(); + if (auto vecType = dstElementType.dyn_cast()) + dstElementType = vecType.getElementType(); + if (elementType != dstElementType) + return failure(); + + // Perform lhs + rhs transpositions to conform to matmul row-major semantics. + // Bail out if the contraction cannot be put in this form. + MLIRContext *ctx = op.getContext(); + Location loc = op.getLoc(); + AffineExpr m, n, k; + bindDims(rew.getContext(), m, n, k); + // LHS must be A(m, k) or A(k, m). + Value lhs = op.getLhs(); + auto lhsMap = op.getIndexingMapsArray()[0]; + if (lhsMap == AffineMap::get(3, 0, {k, m}, ctx)) + lhs = rew.create(loc, lhs, ArrayRef{1, 0}); + else if (lhsMap != AffineMap::get(3, 0, {m, k}, ctx)) + return failure(); + + // RHS must be B(k, n) or B(n, k). + Value rhs = op.getRhs(); + auto rhsMap = op.getIndexingMapsArray()[1]; + if (rhsMap == AffineMap::get(3, 0, {n, k}, ctx)) + rhs = rew.create(loc, rhs, ArrayRef{1, 0}); + else if (rhsMap != AffineMap::get(3, 0, {k, n}, ctx)) + return failure(); + + // At this point lhs and rhs are in row-major. + VectorType lhsType = lhs.getType().cast(); + VectorType rhsType = rhs.getType().cast(); + int64_t lhsRows = lhsType.getDimSize(0); + int64_t lhsColumns = lhsType.getDimSize(1); + int64_t rhsColumns = rhsType.getDimSize(1); + + Type flattenedLHSType = + VectorType::get(lhsType.getNumElements(), lhsType.getElementType()); + lhs = rew.create(loc, flattenedLHSType, lhs); + + Type flattenedRHSType = + VectorType::get(rhsType.getNumElements(), rhsType.getElementType()); + rhs = rew.create(loc, flattenedRHSType, rhs); + + Value mul = rew.create(loc, lhs, rhs, lhsRows, lhsColumns, + rhsColumns); + mul = rew.create( + loc, + VectorType::get({lhsRows, rhsColumns}, + getElementTypeOrSelf(op.getAcc().getType())), + mul); + + // ACC must be C(m, n) or C(n, m). + auto accMap = op.getIndexingMapsArray()[2]; + if (accMap == AffineMap::get(3, 0, {n, m}, ctx)) + mul = rew.create(loc, mul, ArrayRef{1, 0}); + else if (accMap != AffineMap::get(3, 0, {m, n}, ctx)) + llvm_unreachable("invalid contraction semantics"); + + Value res = + elementType.isa() + ? static_cast(rew.create(loc, op.getAcc(), mul)) + : static_cast( + rew.create(loc, op.getAcc(), mul)); + + rew.replaceOp(op, res); + return success(); +} +} // namespace + +void mlir::vector::populateVectorContractLoweringPatterns( + RewritePatternSet &patterns, VectorTransformsOptions options, + PatternBenefit benefit, bool disableOuterProductLowering) { + if (!disableOuterProductLowering) + patterns.add(patterns.getContext(), benefit); + patterns.add( + options, patterns.getContext(), benefit); +} diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorGather.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorGather.cpp new file mode 100644 index 0000000000000..dc10cb6278cb8 --- /dev/null +++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorGather.cpp @@ -0,0 +1,173 @@ +//===- LowerVectorScam.cpp - Lower 'vector.scan' operation ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements target-independent rewrites and utilities to lower the +// 'vector.scan' operation. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Arith/Utils/Utils.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Utils/IndexingUtils.h" +#include "mlir/Dialect/Utils/StructuredOpsUtils.h" +#include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" +#include "mlir/Dialect/Vector/Utils/VectorUtils.h" +#include "mlir/IR/BuiltinAttributeInterfaces.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/ImplicitLocOpBuilder.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/Matchers.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/TypeUtilities.h" +#include "mlir/Interfaces/VectorInterfaces.h" +#include "mlir/Support/LogicalResult.h" + +#define DEBUG_TYPE "vector-broadcast-lowering" + +using namespace mlir; +using namespace mlir::vector; + +namespace { +/// Flattens 2 or more dimensional `vector.gather` ops by unrolling the +/// outermost dimension. For example: +/// ``` +/// %g = vector.gather %base[%c0][%v], %mask, %pass_thru : +/// ... into vector<2x3xf32> +/// +/// ==> +/// +/// %0 = arith.constant dense<0.0> : vector<2x3xf32> +/// %g0 = vector.gather %base[%c0][%v0], %mask0, %pass_thru0 : ... +/// %1 = vector.insert %g0, %0 [0] : vector<3xf32> into vector<2x3xf32> +/// %g1 = vector.gather %base[%c0][%v1], %mask1, %pass_thru1 : ... +/// %g = vector.insert %g1, %1 [1] : vector<3xf32> into vector<2x3xf32> +/// ``` +/// +/// When applied exhaustively, this will produce a sequence of 1-d gather ops. +struct FlattenGather : OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::GatherOp op, + PatternRewriter &rewriter) const override { + VectorType resultTy = op.getType(); + if (resultTy.getRank() < 2) + return rewriter.notifyMatchFailure(op, "already flat"); + + Location loc = op.getLoc(); + Value indexVec = op.getIndexVec(); + Value maskVec = op.getMask(); + Value passThruVec = op.getPassThru(); + + Value result = rewriter.create( + loc, resultTy, rewriter.getZeroAttr(resultTy)); + + Type subTy = VectorType::get(resultTy.getShape().drop_front(), + resultTy.getElementType()); + + for (int64_t i = 0, e = resultTy.getShape().front(); i < e; ++i) { + int64_t thisIdx[1] = {i}; + + Value indexSubVec = + rewriter.create(loc, indexVec, thisIdx); + Value maskSubVec = + rewriter.create(loc, maskVec, thisIdx); + Value passThruSubVec = + rewriter.create(loc, passThruVec, thisIdx); + Value subGather = rewriter.create( + loc, subTy, op.getBase(), op.getIndices(), indexSubVec, maskSubVec, + passThruSubVec); + result = + rewriter.create(loc, subGather, result, thisIdx); + } + + rewriter.replaceOp(op, result); + return success(); + } +}; + +/// Turns 1-d `vector.gather` into a scalarized sequence of `vector.loads` or +/// `tensor.extract`s. To avoid out-of-bounds memory accesses, these +/// loads/extracts are made conditional using `scf.if` ops. +struct Gather1DToConditionalLoads : OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::GatherOp op, + PatternRewriter &rewriter) const override { + VectorType resultTy = op.getType(); + if (resultTy.getRank() != 1) + return rewriter.notifyMatchFailure(op, "unsupported rank"); + + Location loc = op.getLoc(); + Type elemTy = resultTy.getElementType(); + // Vector type with a single element. Used to generate `vector.loads`. + VectorType elemVecTy = VectorType::get({1}, elemTy); + + Value condMask = op.getMask(); + Value base = op.getBase(); + Value indexVec = rewriter.createOrFold( + loc, op.getIndexVectorType().clone(rewriter.getIndexType()), + op.getIndexVec()); + auto baseOffsets = llvm::to_vector(op.getIndices()); + Value lastBaseOffset = baseOffsets.back(); + + Value result = op.getPassThru(); + + // Emit a conditional access for each vector element. + for (int64_t i = 0, e = resultTy.getNumElements(); i < e; ++i) { + int64_t thisIdx[1] = {i}; + Value condition = + rewriter.create(loc, condMask, thisIdx); + Value index = rewriter.create(loc, indexVec, thisIdx); + baseOffsets.back() = + rewriter.createOrFold(loc, lastBaseOffset, index); + + auto loadBuilder = [&](OpBuilder &b, Location loc) { + Value extracted; + if (isa(base.getType())) { + // `vector.load` does not support scalar result; emit a vector load + // and extract the single result instead. + Value load = + b.create(loc, elemVecTy, base, baseOffsets); + int64_t zeroIdx[1] = {0}; + extracted = b.create(loc, load, zeroIdx); + } else { + extracted = b.create(loc, base, baseOffsets); + } + + Value newResult = + b.create(loc, extracted, result, thisIdx); + b.create(loc, newResult); + }; + auto passThruBuilder = [result](OpBuilder &b, Location loc) { + b.create(loc, result); + }; + + result = + rewriter + .create(loc, condition, /*thenBuilder=*/loadBuilder, + /*elseBuilder=*/passThruBuilder) + .getResult(0); + } + + rewriter.replaceOp(op, result); + return success(); + } +}; +} // namespace + +void mlir::vector::populateVectorGatherLoweringPatterns( + RewritePatternSet &patterns, PatternBenefit benefit) { + patterns.add(patterns.getContext(), + benefit); +} diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorMask.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorMask.cpp index 7c66e65fdef8b..e318d4dc15915 100644 --- a/mlir/lib/Dialect/Vector/Transforms/LowerVectorMask.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorMask.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// This file implements target-independent rewrites and utilitites to lower the +// This file implements target-independent rewrites and utilities to lower the // 'vector.mask' operation. // //===----------------------------------------------------------------------===// @@ -14,6 +14,7 @@ #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" #include "mlir/Dialect/Vector/Transforms/Passes.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -30,6 +31,147 @@ namespace vector { using namespace mlir; using namespace mlir::vector; +//===----------------------------------------------------------------------===// +// populateVectorMaskOpLoweringPatterns +//===----------------------------------------------------------------------===// + +namespace { +/// Progressive lowering of CreateMaskOp. +/// One: +/// %x = vector.create_mask %a, ... : vector +/// is replaced by: +/// %l = vector.create_mask ... : vector<...> ; one lower rank +/// %0 = arith.cmpi "slt", %ci, %a | +/// %1 = select %0, %l, %zeroes | +/// %r = vector.insert %1, %pr [i] | d-times +/// %x = .... +/// until a one-dimensional vector is reached. +class CreateMaskOpLowering : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::CreateMaskOp op, + PatternRewriter &rewriter) const override { + auto dstType = op.getResult().getType().cast(); + int64_t rank = dstType.getRank(); + if (rank <= 1) + return rewriter.notifyMatchFailure( + op, "0-D and 1-D vectors are handled separately"); + + auto loc = op.getLoc(); + auto eltType = dstType.getElementType(); + int64_t dim = dstType.getDimSize(0); + Value idx = op.getOperand(0); + + VectorType lowType = + VectorType::get(dstType.getShape().drop_front(), eltType); + Value trueVal = rewriter.create( + loc, lowType, op.getOperands().drop_front()); + Value falseVal = rewriter.create( + loc, lowType, rewriter.getZeroAttr(lowType)); + Value result = rewriter.create( + loc, dstType, rewriter.getZeroAttr(dstType)); + for (int64_t d = 0; d < dim; d++) { + Value bnd = + rewriter.create(loc, rewriter.getIndexAttr(d)); + Value val = rewriter.create(loc, arith::CmpIPredicate::slt, + bnd, idx); + Value sel = rewriter.create(loc, val, trueVal, falseVal); + auto pos = rewriter.getI64ArrayAttr(d); + result = + rewriter.create(loc, dstType, sel, result, pos); + } + rewriter.replaceOp(op, result); + return success(); + } +}; + +/// Progressive lowering of ConstantMaskOp. +/// One: +/// %x = vector.constant_mask [a,b] +/// is replaced by: +/// %z = zero-result +/// %l = vector.constant_mask [b] +/// %4 = vector.insert %l, %z[0] +/// .. +/// %x = vector.insert %l, %..[a-1] +/// until a one-dimensional vector is reached. All these operations +/// will be folded at LLVM IR level. +class ConstantMaskOpLowering : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::ConstantMaskOp op, + PatternRewriter &rewriter) const override { + auto loc = op.getLoc(); + auto dstType = op.getType(); + auto eltType = dstType.getElementType(); + auto dimSizes = op.getMaskDimSizes(); + int64_t rank = dstType.getRank(); + + if (rank == 0) { + assert(dimSizes.size() == 1 && + "Expected exactly one dim size for a 0-D vector"); + bool value = dimSizes[0].cast().getInt() == 1; + rewriter.replaceOpWithNewOp( + op, dstType, + DenseIntElementsAttr::get( + VectorType::get(ArrayRef{}, rewriter.getI1Type()), + ArrayRef{value})); + return success(); + } + + // Scalable constant masks can only be lowered for the "none set" case. + if (dstType.cast().isScalable()) { + rewriter.replaceOpWithNewOp( + op, DenseElementsAttr::get(dstType, false)); + return success(); + } + + int64_t trueDim = std::min(dstType.getDimSize(0), + dimSizes[0].cast().getInt()); + + if (rank == 1) { + // Express constant 1-D case in explicit vector form: + // [T,..,T,F,..,F]. + SmallVector values(dstType.getDimSize(0)); + for (int64_t d = 0; d < trueDim; d++) + values[d] = true; + rewriter.replaceOpWithNewOp( + op, dstType, rewriter.getBoolVectorAttr(values)); + return success(); + } + + VectorType lowType = + VectorType::get(dstType.getShape().drop_front(), eltType); + SmallVector newDimSizes; + for (int64_t r = 1; r < rank; r++) + newDimSizes.push_back(dimSizes[r].cast().getInt()); + Value trueVal = rewriter.create( + loc, lowType, rewriter.getI64ArrayAttr(newDimSizes)); + Value result = rewriter.create( + loc, dstType, rewriter.getZeroAttr(dstType)); + for (int64_t d = 0; d < trueDim; d++) { + auto pos = rewriter.getI64ArrayAttr(d); + result = + rewriter.create(loc, dstType, trueVal, result, pos); + } + rewriter.replaceOp(op, result); + return success(); + } +}; +} // namespace + +void mlir::vector::populateVectorMaskOpLoweringPatterns( + RewritePatternSet &patterns, PatternBenefit benefit) { + patterns.add( + patterns.getContext(), benefit); +} + +//===----------------------------------------------------------------------===// +// populateVectorMaskLoweringPatternsForSideEffectingOps +//===----------------------------------------------------------------------===// + namespace { /// The `MaskOpRewritePattern` implements a pattern that follows a two-fold diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorMultiDimReductionTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorMultiReduction.cpp similarity index 98% rename from mlir/lib/Dialect/Vector/Transforms/VectorMultiDimReductionTransforms.cpp rename to mlir/lib/Dialect/Vector/Transforms/LowerVectorMultiReduction.cpp index b790d141415aa..1744c46db5886 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorMultiDimReductionTransforms.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorMultiReduction.cpp @@ -1,4 +1,4 @@ -//===- VectorMultiDimReductionTransforms.cpp - Multi-Reduction Transforms -===// +//===- LowerVectorMultiReduction.cpp - Lower `vector.multi_reduction` op --===// // /// Part of the LLVM Project, under the Apache License v2.0 with LLVM /// Exceptions. See https://llvm.org/LICENSE.txt for license information. @@ -6,12 +6,13 @@ // //===----------------------------------------------------------------------===// // -/// This file implements target-independent rewrites of MultiDimReductionOp. +// This file implements target-independent rewrites and utilities to lower the +// 'vector.multi_reduction' operation. // //===----------------------------------------------------------------------===// #include "mlir/Dialect/Arith/IR/Arith.h" -#include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" #include "mlir/IR/Builders.h" #include "mlir/IR/TypeUtilities.h" @@ -19,6 +20,7 @@ using namespace mlir; +namespace { /// This file implements the following transformations as composable atomic /// patterns. @@ -441,6 +443,7 @@ struct OneDimMultiReductionToTwoDim return success(); } }; +} // namespace void mlir::vector::populateVectorMultiReductionLoweringPatterns( RewritePatternSet &patterns, VectorMultiReductionLowering options, diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorScan.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorScan.cpp new file mode 100644 index 0000000000000..eb2deba7bc46b --- /dev/null +++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorScan.cpp @@ -0,0 +1,251 @@ +//===- LowerVectorScam.cpp - Lower 'vector.scan' operation ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements target-independent rewrites and utilities to lower the +// 'vector.scan' operation. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Arith/Utils/Utils.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Utils/IndexingUtils.h" +#include "mlir/Dialect/Utils/StructuredOpsUtils.h" +#include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" +#include "mlir/Dialect/Vector/Utils/VectorUtils.h" +#include "mlir/IR/BuiltinAttributeInterfaces.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/ImplicitLocOpBuilder.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/Matchers.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/TypeUtilities.h" +#include "mlir/Interfaces/VectorInterfaces.h" +#include "mlir/Support/LogicalResult.h" + +#define DEBUG_TYPE "vector-broadcast-lowering" + +using namespace mlir; +using namespace mlir::vector; + +/// This function constructs the appropriate integer or float +/// operation given the vector combining kind and operands. The +/// supported int operations are : add, mul, min (signed/unsigned), +/// max(signed/unsigned), and, or, xor. The supported float +/// operations are : add, mul, min and max. +static Value genOperator(Location loc, Value x, Value y, + vector::CombiningKind kind, + PatternRewriter &rewriter) { + using vector::CombiningKind; + + auto elType = x.getType().cast().getElementType(); + bool isInt = elType.isIntOrIndex(); + + Value combinedResult{nullptr}; + switch (kind) { + case CombiningKind::ADD: + if (isInt) + combinedResult = rewriter.create(loc, x, y); + else + combinedResult = rewriter.create(loc, x, y); + break; + case CombiningKind::MUL: + if (isInt) + combinedResult = rewriter.create(loc, x, y); + else + combinedResult = rewriter.create(loc, x, y); + break; + case CombiningKind::MINUI: + combinedResult = rewriter.create(loc, x, y); + break; + case CombiningKind::MINSI: + combinedResult = rewriter.create(loc, x, y); + break; + case CombiningKind::MAXUI: + combinedResult = rewriter.create(loc, x, y); + break; + case CombiningKind::MAXSI: + combinedResult = rewriter.create(loc, x, y); + break; + case CombiningKind::AND: + combinedResult = rewriter.create(loc, x, y); + break; + case CombiningKind::OR: + combinedResult = rewriter.create(loc, x, y); + break; + case CombiningKind::XOR: + combinedResult = rewriter.create(loc, x, y); + break; + case CombiningKind::MINF: + combinedResult = rewriter.create(loc, x, y); + break; + case CombiningKind::MAXF: + combinedResult = rewriter.create(loc, x, y); + break; + } + return combinedResult; +} + +/// This function checks to see if the vector combining kind +/// is consistent with the integer or float element type. +static bool isValidKind(bool isInt, vector::CombiningKind kind) { + using vector::CombiningKind; + enum class KindType { FLOAT, INT, INVALID }; + KindType type{KindType::INVALID}; + switch (kind) { + case CombiningKind::MINF: + case CombiningKind::MAXF: + type = KindType::FLOAT; + break; + case CombiningKind::MINUI: + case CombiningKind::MINSI: + case CombiningKind::MAXUI: + case CombiningKind::MAXSI: + case CombiningKind::AND: + case CombiningKind::OR: + case CombiningKind::XOR: + type = KindType::INT; + break; + case CombiningKind::ADD: + case CombiningKind::MUL: + type = isInt ? KindType::INT : KindType::FLOAT; + break; + } + bool isValidIntKind = (type == KindType::INT) && isInt; + bool isValidFloatKind = (type == KindType::FLOAT) && (!isInt); + return (isValidIntKind || isValidFloatKind); +} + +namespace { +/// Convert vector.scan op into arith ops and vector.insert_strided_slice / +/// vector.extract_strided_slice. +/// +/// Example: +/// +/// ``` +/// %0:2 = vector.scan , %arg0, %arg1 +/// {inclusive = true, reduction_dim = 1} : +/// (vector<2x3xi32>, vector<2xi32>) to (vector<2x3xi32>, vector<2xi32>) +/// ``` +/// +/// is converted to: +/// +/// ``` +/// %cst = arith.constant dense<0> : vector<2x3xi32> +/// %0 = vector.extract_strided_slice %arg0 +/// {offsets = [0, 0], sizes = [2, 1], strides = [1, 1]} +/// : vector<2x3xi32> to vector<2x1xi32> +/// %1 = vector.insert_strided_slice %0, %cst +/// {offsets = [0, 0], strides = [1, 1]} +/// : vector<2x1xi32> into vector<2x3xi32> +/// %2 = vector.extract_strided_slice %arg0 +/// {offsets = [0, 1], sizes = [2, 1], strides = [1, 1]} +/// : vector<2x3xi32> to vector<2x1xi32> +/// %3 = arith.muli %0, %2 : vector<2x1xi32> +/// %4 = vector.insert_strided_slice %3, %1 +/// {offsets = [0, 1], strides = [1, 1]} +/// : vector<2x1xi32> into vector<2x3xi32> +/// %5 = vector.extract_strided_slice %arg0 +/// {offsets = [0, 2], sizes = [2, 1], strides = [1, 1]} +/// : vector<2x3xi32> to vector<2x1xi32> +/// %6 = arith.muli %3, %5 : vector<2x1xi32> +/// %7 = vector.insert_strided_slice %6, %4 +/// {offsets = [0, 2], strides = [1, 1]} +/// : vector<2x1xi32> into vector<2x3xi32> +/// %8 = vector.shape_cast %6 : vector<2x1xi32> to vector<2xi32> +/// return %7, %8 : vector<2x3xi32>, vector<2xi32> +/// ``` +struct ScanToArithOps : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::ScanOp scanOp, + PatternRewriter &rewriter) const override { + auto loc = scanOp.getLoc(); + VectorType destType = scanOp.getDestType(); + ArrayRef destShape = destType.getShape(); + auto elType = destType.getElementType(); + bool isInt = elType.isIntOrIndex(); + if (!isValidKind(isInt, scanOp.getKind())) + return failure(); + + VectorType resType = VectorType::get(destShape, elType); + Value result = rewriter.create( + loc, resType, rewriter.getZeroAttr(resType)); + int64_t reductionDim = scanOp.getReductionDim(); + bool inclusive = scanOp.getInclusive(); + int64_t destRank = destType.getRank(); + VectorType initialValueType = scanOp.getInitialValueType(); + int64_t initialValueRank = initialValueType.getRank(); + + SmallVector reductionShape(destShape.begin(), destShape.end()); + reductionShape[reductionDim] = 1; + VectorType reductionType = VectorType::get(reductionShape, elType); + SmallVector offsets(destRank, 0); + SmallVector strides(destRank, 1); + SmallVector sizes(destShape.begin(), destShape.end()); + sizes[reductionDim] = 1; + ArrayAttr scanSizes = rewriter.getI64ArrayAttr(sizes); + ArrayAttr scanStrides = rewriter.getI64ArrayAttr(strides); + + Value lastOutput, lastInput; + for (int i = 0; i < destShape[reductionDim]; i++) { + offsets[reductionDim] = i; + ArrayAttr scanOffsets = rewriter.getI64ArrayAttr(offsets); + Value input = rewriter.create( + loc, reductionType, scanOp.getSource(), scanOffsets, scanSizes, + scanStrides); + Value output; + if (i == 0) { + if (inclusive) { + output = input; + } else { + if (initialValueRank == 0) { + // ShapeCastOp cannot handle 0-D vectors + output = rewriter.create( + loc, input.getType(), scanOp.getInitialValue()); + } else { + output = rewriter.create( + loc, input.getType(), scanOp.getInitialValue()); + } + } + } else { + Value y = inclusive ? input : lastInput; + output = genOperator(loc, lastOutput, y, scanOp.getKind(), rewriter); + assert(output != nullptr); + } + result = rewriter.create( + loc, output, result, offsets, strides); + lastOutput = output; + lastInput = input; + } + + Value reduction; + if (initialValueRank == 0) { + Value v = rewriter.create(loc, lastOutput, 0); + reduction = + rewriter.create(loc, initialValueType, v); + } else { + reduction = rewriter.create(loc, initialValueType, + lastOutput); + } + + rewriter.replaceOp(scanOp, {result, reduction}); + return success(); + } +}; +} // namespace + +void mlir::vector::populateVectorScanLoweringPatterns( + RewritePatternSet &patterns, PatternBenefit benefit) { + patterns.add(patterns.getContext(), benefit); +} diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorShapeCast.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorShapeCast.cpp new file mode 100644 index 0000000000000..bd9716cbca94c --- /dev/null +++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorShapeCast.cpp @@ -0,0 +1,177 @@ +//===- LowerVectorShapeCast.cpp - Lower 'vector.shape_cast' operation -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements target-independent rewrites and utilities to lower the +// 'vector.shape_cast' operation. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Arith/Utils/Utils.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Utils/IndexingUtils.h" +#include "mlir/Dialect/Utils/StructuredOpsUtils.h" +#include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" +#include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h" +#include "mlir/Dialect/Vector/Utils/VectorUtils.h" +#include "mlir/IR/BuiltinAttributeInterfaces.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/ImplicitLocOpBuilder.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/Matchers.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/TypeUtilities.h" +#include "mlir/Interfaces/VectorInterfaces.h" +#include "mlir/Support/LogicalResult.h" + +#define DEBUG_TYPE "vector-shape-cast-lowering" + +using namespace mlir; +using namespace mlir::vector; + +namespace { +/// ShapeOp 2D -> 1D downcast serves the purpose of flattening 2-D to 1-D +/// vectors progressively on the way to target llvm.matrix intrinsics. +/// This iterates over the most major dimension of the 2-D vector and performs +/// rewrites into: +/// vector.extract from 2-D + vector.insert_strided_slice offset into 1-D +class ShapeCastOp2DDownCastRewritePattern + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::ShapeCastOp op, + PatternRewriter &rewriter) const override { + auto sourceVectorType = op.getSourceVectorType(); + auto resultVectorType = op.getResultVectorType(); + if (sourceVectorType.getRank() != 2 || resultVectorType.getRank() != 1) + return failure(); + + auto loc = op.getLoc(); + Value desc = rewriter.create( + loc, resultVectorType, rewriter.getZeroAttr(resultVectorType)); + unsigned mostMinorVectorSize = sourceVectorType.getShape()[1]; + for (int64_t i = 0, e = sourceVectorType.getShape().front(); i != e; ++i) { + Value vec = rewriter.create(loc, op.getSource(), i); + desc = rewriter.create( + loc, vec, desc, + /*offsets=*/i * mostMinorVectorSize, /*strides=*/1); + } + rewriter.replaceOp(op, desc); + return success(); + } +}; + +/// ShapeOp 1D -> 2D upcast serves the purpose of unflattening 2-D from 1-D +/// vectors progressively. +/// This iterates over the most major dimension of the 2-D vector and performs +/// rewrites into: +/// vector.extract_strided_slice from 1-D + vector.insert into 2-D +/// Note that 1-D extract_strided_slice are lowered to efficient vector.shuffle. +class ShapeCastOp2DUpCastRewritePattern + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::ShapeCastOp op, + PatternRewriter &rewriter) const override { + auto sourceVectorType = op.getSourceVectorType(); + auto resultVectorType = op.getResultVectorType(); + if (sourceVectorType.getRank() != 1 || resultVectorType.getRank() != 2) + return failure(); + + auto loc = op.getLoc(); + Value desc = rewriter.create( + loc, resultVectorType, rewriter.getZeroAttr(resultVectorType)); + unsigned mostMinorVectorSize = resultVectorType.getShape()[1]; + for (int64_t i = 0, e = resultVectorType.getShape().front(); i != e; ++i) { + Value vec = rewriter.create( + loc, op.getSource(), /*offsets=*/i * mostMinorVectorSize, + /*sizes=*/mostMinorVectorSize, + /*strides=*/1); + desc = rewriter.create(loc, vec, desc, i); + } + rewriter.replaceOp(op, desc); + return success(); + } +}; + +// We typically should not lower general shape cast operations into data +// movement instructions, since the assumption is that these casts are +// optimized away during progressive lowering. For completeness, however, +// we fall back to a reference implementation that moves all elements +// into the right place if we get here. +class ShapeCastOpRewritePattern : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::ShapeCastOp op, + PatternRewriter &rewriter) const override { + Location loc = op.getLoc(); + auto sourceVectorType = op.getSourceVectorType(); + auto resultVectorType = op.getResultVectorType(); + + // Special case 2D / 1D lowerings with better implementations. + // TODO: make is ND / 1D to allow generic ND -> 1D -> MD. + int64_t srcRank = sourceVectorType.getRank(); + int64_t resRank = resultVectorType.getRank(); + if ((srcRank == 2 && resRank == 1) || (srcRank == 1 && resRank == 2)) + return failure(); + + // Generic ShapeCast lowering path goes all the way down to unrolled scalar + // extract/insert chains. + // TODO: consider evolving the semantics to only allow 1D source or dest and + // drop this potentially very expensive lowering. + // Compute number of elements involved in the reshape. + int64_t numElts = 1; + for (int64_t r = 0; r < srcRank; r++) + numElts *= sourceVectorType.getDimSize(r); + // Replace with data movement operations: + // x[0,0,0] = y[0,0] + // x[0,0,1] = y[0,1] + // x[0,1,0] = y[0,2] + // etc., incrementing the two index vectors "row-major" + // within the source and result shape. + SmallVector srcIdx(srcRank); + SmallVector resIdx(resRank); + Value result = rewriter.create( + loc, resultVectorType, rewriter.getZeroAttr(resultVectorType)); + for (int64_t i = 0; i < numElts; i++) { + if (i != 0) { + incIdx(srcIdx, sourceVectorType, srcRank - 1); + incIdx(resIdx, resultVectorType, resRank - 1); + } + Value e = rewriter.create(loc, op.getSource(), srcIdx); + result = rewriter.create(loc, e, result, resIdx); + } + rewriter.replaceOp(op, result); + return success(); + } + +private: + static void incIdx(SmallVector &idx, VectorType tp, int64_t r) { + assert(0 <= r && r < tp.getRank()); + if (++idx[r] == tp.getDimSize(r)) { + idx[r] = 0; + incIdx(idx, tp, r - 1); + } + } +}; +} // namespace + +void mlir::vector::populateVectorShapeCastLoweringPatterns( + RewritePatternSet &patterns, PatternBenefit benefit) { + patterns.add( + patterns.getContext(), benefit); +} diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransferPermutationMapRewritePatterns.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp similarity index 57% rename from mlir/lib/Dialect/Vector/Transforms/VectorTransferPermutationMapRewritePatterns.cpp rename to mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp index 68d9a349478bf..c2ce9aa10a850 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorTransferPermutationMapRewritePatterns.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp @@ -14,7 +14,7 @@ #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" -#include "mlir/Dialect/Vector/Transforms/VectorTransforms.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" #include "mlir/Interfaces/VectorInterfaces.h" using namespace mlir; @@ -46,6 +46,11 @@ static Value extendVectorRank(OpBuilder &builder, Location loc, Value vec, return builder.create(loc, newVecType, vec); } +//===----------------------------------------------------------------------===// +// populateVectorTransferPermutationMapLoweringPatterns +//===----------------------------------------------------------------------===// + +namespace { /// Lower transfer_read op with permutation into a transfer_read with a /// permutation map composed of leading zeros followed by a minor identiy + /// vector.transpose op. @@ -332,6 +337,8 @@ struct TransferOpReduceRank : public OpRewritePattern { } }; +} // namespace + void mlir::vector::populateVectorTransferPermutationMapLoweringPatterns( RewritePatternSet &patterns, PatternBenefit benefit) { patterns @@ -339,3 +346,239 @@ void mlir::vector::populateVectorTransferPermutationMapLoweringPatterns( TransferOpReduceRank, TransferWriteNonPermutationLowering>( patterns.getContext(), benefit); } + +//===----------------------------------------------------------------------===// +// populateVectorTransferLoweringPatterns +//===----------------------------------------------------------------------===// + +namespace { +/// Progressive lowering of transfer_read. This pattern supports lowering of +/// `vector.transfer_read` to a combination of `vector.load` and +/// `vector.broadcast` if all of the following hold: +/// - Stride of most minor memref dimension must be 1. +/// - Out-of-bounds masking is not required. +/// - If the memref's element type is a vector type then it coincides with the +/// result type. +/// - The permutation map doesn't perform permutation (broadcasting is allowed). +struct TransferReadToVectorLoadLowering + : public OpRewritePattern { + TransferReadToVectorLoadLowering(MLIRContext *context, + std::optional maxRank, + PatternBenefit benefit = 1) + : OpRewritePattern(context, benefit), + maxTransferRank(maxRank) {} + + LogicalResult matchAndRewrite(vector::TransferReadOp read, + PatternRewriter &rewriter) const override { + if (maxTransferRank && read.getVectorType().getRank() > *maxTransferRank) + return failure(); + + SmallVector broadcastedDims; + // Permutations are handled by VectorToSCF or + // populateVectorTransferPermutationMapLoweringPatterns. + // We let the 0-d corner case pass-through as it is supported. + if (!read.getPermutationMap().isMinorIdentityWithBroadcasting( + &broadcastedDims)) + return failure(); + + auto memRefType = read.getShapedType().dyn_cast(); + if (!memRefType) + return failure(); + + // Non-unit strides are handled by VectorToSCF. + if (!vector::isLastMemrefDimUnitStride(memRefType)) + return failure(); + + // If there is broadcasting involved then we first load the unbroadcasted + // vector, and then broadcast it with `vector.broadcast`. + ArrayRef vectorShape = read.getVectorType().getShape(); + SmallVector unbroadcastedVectorShape(vectorShape.begin(), + vectorShape.end()); + for (unsigned i : broadcastedDims) + unbroadcastedVectorShape[i] = 1; + VectorType unbroadcastedVectorType = VectorType::get( + unbroadcastedVectorShape, read.getVectorType().getElementType()); + + // `vector.load` supports vector types as memref's elements only when the + // resulting vector type is the same as the element type. + auto memrefElTy = memRefType.getElementType(); + if (memrefElTy.isa() && memrefElTy != unbroadcastedVectorType) + return failure(); + + // Otherwise, element types of the memref and the vector must match. + if (!memrefElTy.isa() && + memrefElTy != read.getVectorType().getElementType()) + return failure(); + + // Out-of-bounds dims are handled by MaterializeTransferMask. + if (read.hasOutOfBoundsDim()) + return failure(); + + // Create vector load op. + Operation *loadOp; + if (read.getMask()) { + Value fill = rewriter.create( + read.getLoc(), unbroadcastedVectorType, read.getPadding()); + loadOp = rewriter.create( + read.getLoc(), unbroadcastedVectorType, read.getSource(), + read.getIndices(), read.getMask(), fill); + } else { + loadOp = rewriter.create( + read.getLoc(), unbroadcastedVectorType, read.getSource(), + read.getIndices()); + } + + // Insert a broadcasting op if required. + if (!broadcastedDims.empty()) { + rewriter.replaceOpWithNewOp( + read, read.getVectorType(), loadOp->getResult(0)); + } else { + rewriter.replaceOp(read, loadOp->getResult(0)); + } + + return success(); + } + + std::optional maxTransferRank; +}; + +/// Replace a 0-d vector.load with a memref.load + vector.broadcast. +// TODO: we shouldn't cross the vector/scalar domains just for this +// but atm we lack the infra to avoid it. Possible solutions include: +// - go directly to LLVM + bitcast +// - introduce a bitcast op and likely a new pointer dialect +// - let memref.load/store additionally support the 0-d vector case +// There are still deeper data layout issues lingering even in this +// trivial case (for architectures for which this matters). +struct VectorLoadToMemrefLoadLowering + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::LoadOp loadOp, + PatternRewriter &rewriter) const override { + auto vecType = loadOp.getVectorType(); + if (vecType.getNumElements() != 1) + return failure(); + auto memrefLoad = rewriter.create( + loadOp.getLoc(), loadOp.getBase(), loadOp.getIndices()); + rewriter.replaceOpWithNewOp(loadOp, vecType, + memrefLoad); + return success(); + } +}; + +/// Replace a 0-d vector.store with a vector.extractelement + memref.store. +struct VectorStoreToMemrefStoreLowering + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::StoreOp storeOp, + PatternRewriter &rewriter) const override { + auto vecType = storeOp.getVectorType(); + if (vecType.getNumElements() != 1) + return failure(); + Value extracted; + if (vecType.getRank() == 0) { + // TODO: Unifiy once ExtractOp supports 0-d vectors. + extracted = rewriter.create( + storeOp.getLoc(), storeOp.getValueToStore()); + } else { + SmallVector indices(vecType.getRank(), 0); + extracted = rewriter.create( + storeOp.getLoc(), storeOp.getValueToStore(), indices); + } + + rewriter.replaceOpWithNewOp( + storeOp, extracted, storeOp.getBase(), storeOp.getIndices()); + return success(); + } +}; + +/// Progressive lowering of transfer_write. This pattern supports lowering of +/// `vector.transfer_write` to `vector.store` if all of the following hold: +/// - Stride of most minor memref dimension must be 1. +/// - Out-of-bounds masking is not required. +/// - If the memref's element type is a vector type then it coincides with the +/// type of the written value. +/// - The permutation map is the minor identity map (neither permutation nor +/// broadcasting is allowed). +struct TransferWriteToVectorStoreLowering + : public OpRewritePattern { + TransferWriteToVectorStoreLowering(MLIRContext *context, + std::optional maxRank, + PatternBenefit benefit = 1) + : OpRewritePattern(context, benefit), + maxTransferRank(maxRank) {} + + LogicalResult matchAndRewrite(vector::TransferWriteOp write, + PatternRewriter &rewriter) const override { + if (maxTransferRank && write.getVectorType().getRank() > *maxTransferRank) + return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { + diag << "rank exceeds maxTransferRank: " << write; + }); + + // Permutations are handled by VectorToSCF or + // populateVectorTransferPermutationMapLoweringPatterns. + if ( // pass-through for the 0-d corner case. + !write.getPermutationMap().isMinorIdentity()) + return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { + diag << "permutation map is not minor identity: " << write; + }); + + auto memRefType = write.getShapedType().dyn_cast(); + if (!memRefType) + return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { + diag << "not a memref type: " << write; + }); + + // Non-unit strides are handled by VectorToSCF. + if (!vector::isLastMemrefDimUnitStride(memRefType)) + return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { + diag << "most minor stride is not 1: " << write; + }); + + // `vector.store` supports vector types as memref's elements only when the + // type of the vector value being written is the same as the element type. + auto memrefElTy = memRefType.getElementType(); + if (memrefElTy.isa() && memrefElTy != write.getVectorType()) + return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { + diag << "elemental type mismatch: " << write; + }); + + // Otherwise, element types of the memref and the vector must match. + if (!memrefElTy.isa() && + memrefElTy != write.getVectorType().getElementType()) + return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { + diag << "elemental type mismatch: " << write; + }); + + // Out-of-bounds dims are handled by MaterializeTransferMask. + if (write.hasOutOfBoundsDim()) + return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { + diag << "out of bounds dim: " << write; + }); + if (write.getMask()) { + rewriter.replaceOpWithNewOp( + write, write.getSource(), write.getIndices(), write.getMask(), + write.getVector()); + } else { + rewriter.replaceOpWithNewOp( + write, write.getVector(), write.getSource(), write.getIndices()); + } + return success(); + } + + std::optional maxTransferRank; +}; +} // namespace + +void mlir::vector::populateVectorTransferLoweringPatterns( + RewritePatternSet &patterns, std::optional maxTransferRank, + PatternBenefit benefit) { + patterns.add(patterns.getContext(), + maxTransferRank, benefit); + patterns + .add( + patterns.getContext(), benefit); +} diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorTranspose.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorTranspose.cpp new file mode 100644 index 0000000000000..f6e8b0c445c99 --- /dev/null +++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorTranspose.cpp @@ -0,0 +1,210 @@ +//===- LowerVectorTranspose.cpp - Lower 'vector.transpose' operation ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements target-independent rewrites and utilities to lower the +// 'vector.transpose' operation. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Arith/Utils/Utils.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Utils/IndexingUtils.h" +#include "mlir/Dialect/Utils/StructuredOpsUtils.h" +#include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" +#include "mlir/Dialect/Vector/Utils/VectorUtils.h" +#include "mlir/IR/BuiltinAttributeInterfaces.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/ImplicitLocOpBuilder.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/Matchers.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/TypeUtilities.h" +#include "mlir/Interfaces/VectorInterfaces.h" +#include "mlir/Support/LogicalResult.h" + +#define DEBUG_TYPE "vector-shape-cast-lowering" + +using namespace mlir; +using namespace mlir::vector; + +/// Given a 'transpose' pattern, prune the rightmost dimensions that are not +/// transposed. +static void pruneNonTransposedDims(ArrayRef transpose, + SmallVectorImpl &result) { + size_t numTransposedDims = transpose.size(); + for (size_t transpDim : llvm::reverse(transpose)) { + if (transpDim != numTransposedDims - 1) + break; + numTransposedDims--; + } + + result.append(transpose.begin(), transpose.begin() + numTransposedDims); +} + +namespace { +/// Progressive lowering of TransposeOp. +/// One: +/// %x = vector.transpose %y, [1, 0] +/// is replaced by: +/// %z = arith.constant dense<0.000000e+00> +/// %0 = vector.extract %y[0, 0] +/// %1 = vector.insert %0, %z [0, 0] +/// .. +/// %x = vector.insert .., .. [.., ..] +class TransposeOpLowering : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + TransposeOpLowering(vector::VectorTransformsOptions vectorTransformOptions, + MLIRContext *context, PatternBenefit benefit = 1) + : OpRewritePattern(context, benefit), + vectorTransformOptions(vectorTransformOptions) {} + + LogicalResult matchAndRewrite(vector::TransposeOp op, + PatternRewriter &rewriter) const override { + auto loc = op.getLoc(); + + Value input = op.getVector(); + VectorType inputType = op.getSourceVectorType(); + VectorType resType = op.getResultVectorType(); + + // Set up convenience transposition table. + SmallVector transp; + for (auto attr : op.getTransp()) + transp.push_back(attr.cast().getInt()); + + if (vectorTransformOptions.vectorTransposeLowering == + vector::VectorTransposeLowering::Shuffle && + resType.getRank() == 2 && transp[0] == 1 && transp[1] == 0) + return rewriter.notifyMatchFailure( + op, "Options specifies lowering to shuffle"); + + // Handle a true 2-D matrix transpose differently when requested. + if (vectorTransformOptions.vectorTransposeLowering == + vector::VectorTransposeLowering::Flat && + resType.getRank() == 2 && transp[0] == 1 && transp[1] == 0) { + Type flattenedType = + VectorType::get(resType.getNumElements(), resType.getElementType()); + auto matrix = + rewriter.create(loc, flattenedType, input); + auto rows = rewriter.getI32IntegerAttr(resType.getShape()[0]); + auto columns = rewriter.getI32IntegerAttr(resType.getShape()[1]); + Value trans = rewriter.create( + loc, flattenedType, matrix, rows, columns); + rewriter.replaceOpWithNewOp(op, resType, trans); + return success(); + } + + // Generate unrolled extract/insert ops. We do not unroll the rightmost + // (i.e., highest-order) dimensions that are not transposed and leave them + // in vector form to improve performance. Therefore, we prune those + // dimensions from the shape/transpose data structures used to generate the + // extract/insert ops. + SmallVector prunedTransp; + pruneNonTransposedDims(transp, prunedTransp); + size_t numPrunedDims = transp.size() - prunedTransp.size(); + auto prunedInShape = inputType.getShape().drop_back(numPrunedDims); + auto prunedInStrides = computeStrides(prunedInShape); + + // Generates the extract/insert operations for every scalar/vector element + // of the leftmost transposed dimensions. We traverse every transpose + // element using a linearized index that we delinearize to generate the + // appropriate indices for the extract/insert operations. + Value result = rewriter.create( + loc, resType, rewriter.getZeroAttr(resType)); + int64_t numTransposedElements = ShapedType::getNumElements(prunedInShape); + + for (int64_t linearIdx = 0; linearIdx < numTransposedElements; + ++linearIdx) { + auto extractIdxs = delinearize(linearIdx, prunedInStrides); + SmallVector insertIdxs(extractIdxs); + applyPermutationToVector(insertIdxs, prunedTransp); + Value extractOp = + rewriter.create(loc, input, extractIdxs); + result = + rewriter.create(loc, extractOp, result, insertIdxs); + } + + rewriter.replaceOp(op, result); + return success(); + } + +private: + /// Options to control the vector patterns. + vector::VectorTransformsOptions vectorTransformOptions; +}; + +/// Rewrite a 2-D vector.transpose as a sequence of: +/// vector.shape_cast 2D -> 1D +/// vector.shuffle +/// vector.shape_cast 1D -> 2D +class TransposeOp2DToShuffleLowering + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + TransposeOp2DToShuffleLowering( + vector::VectorTransformsOptions vectorTransformOptions, + MLIRContext *context, PatternBenefit benefit = 1) + : OpRewritePattern(context, benefit), + vectorTransformOptions(vectorTransformOptions) {} + + LogicalResult matchAndRewrite(vector::TransposeOp op, + PatternRewriter &rewriter) const override { + auto loc = op.getLoc(); + + VectorType srcType = op.getSourceVectorType(); + if (srcType.getRank() != 2) + return rewriter.notifyMatchFailure(op, "Not a 2D transpose"); + + SmallVector transp; + for (auto attr : op.getTransp()) + transp.push_back(attr.cast().getInt()); + if (transp[0] != 1 && transp[1] != 0) + return rewriter.notifyMatchFailure(op, "Not a 2D transpose permutation"); + + if (vectorTransformOptions.vectorTransposeLowering != + VectorTransposeLowering::Shuffle) + return rewriter.notifyMatchFailure(op, "Options do not ask for Shuffle"); + + int64_t m = srcType.getShape().front(), n = srcType.getShape().back(); + Value casted = rewriter.create( + loc, VectorType::get({m * n}, srcType.getElementType()), + op.getVector()); + SmallVector mask; + mask.reserve(m * n); + for (int64_t j = 0; j < n; ++j) + for (int64_t i = 0; i < m; ++i) + mask.push_back(i * n + j); + + Value shuffled = + rewriter.create(loc, casted, casted, mask); + rewriter.replaceOpWithNewOp( + op, op.getResultVectorType(), shuffled); + + return success(); + } + +private: + /// Options to control the vector patterns. + vector::VectorTransformsOptions vectorTransformOptions; +}; +} // namespace + +void mlir::vector::populateVectorTransposeLoweringPatterns( + RewritePatternSet &patterns, VectorTransformsOptions options, + PatternBenefit benefit) { + patterns.add( + options, patterns.getContext(), benefit); +} diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp index 38062b9893f1a..b0690f63422d9 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp @@ -16,6 +16,7 @@ #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" #include "mlir/Dialect/Vector/Transforms/VectorTransforms.h" #include "mlir/Dialect/Vector/Utils/VectorUtils.h" #include "mlir/IR/BuiltinOps.h" diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp index ee23b5494f707..caf5822256bc6 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include #include +#include #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Arith/IR/Arith.h" @@ -92,11 +92,11 @@ static Value createInBoundsCond(RewriterBase &b, } /// Split a vector.transfer operation into an in-bounds (i.e., no out-of-bounds -/// masking) fastpath and a slowpath. +/// masking) fast path and a slow path. /// If `ifOp` is not null and the result is `success, the `ifOp` points to the /// newly created conditional upon function return. -/// To accomodate for the fact that the original vector.transfer indexing may be -/// arbitrary and the slow path indexes @[0...0] in the temporary buffer, the +/// To accommodate for the fact that the original vector.transfer indexing may +/// be arbitrary and the slow path indexes @[0...0] in the temporary buffer, the /// scf.if op returns a view and values of type index. /// At this time, only vector.transfer_read case is implemented. /// @@ -107,11 +107,11 @@ static Value createInBoundsCond(RewriterBase &b, /// is transformed into: /// ``` /// %1:3 = scf.if (%inBounds) { -/// // fastpath, direct cast +/// // fast path, direct cast /// memref.cast %A: memref to compatibleMemRefType /// scf.yield %view : compatibleMemRefType, index, index /// } else { -/// // slowpath, not in-bounds vector.transfer or linalg.copy. +/// // slow path, not in-bounds vector.transfer or linalg.copy. /// memref.cast %alloc: memref to compatibleMemRefType /// scf.yield %4 : compatibleMemRefType, index, index // } @@ -172,12 +172,10 @@ static MemRefType getCastCompatibleMemRefType(MemRefType aT, MemRefType bT) { for (int64_t idx = 0, e = aT.getRank(); idx < e; ++idx) { resShape[idx] = (aShape[idx] == bShape[idx]) ? aShape[idx] : ShapedType::kDynamic; - resStrides[idx] = (aStrides[idx] == bStrides[idx]) - ? aStrides[idx] - : ShapedType::kDynamic; + resStrides[idx] = + (aStrides[idx] == bStrides[idx]) ? aStrides[idx] : ShapedType::kDynamic; } - resOffset = - (aOffset == bOffset) ? aOffset : ShapedType::kDynamic; + resOffset = (aOffset == bOffset) ? aOffset : ShapedType::kDynamic; return MemRefType::get( resShape, aT.getElementType(), StridedLayoutAttr::get(aT.getContext(), resOffset, resStrides)); @@ -634,7 +632,34 @@ LogicalResult mlir::vector::splitFullAndPartialTransfer( return success(); } -LogicalResult mlir::vector::VectorTransferFullPartialRewriter::matchAndRewrite( +namespace { +/// Apply `splitFullAndPartialTransfer` selectively via a pattern. This pattern +/// may take an extra filter to perform selection at a finer granularity. +struct VectorTransferFullPartialRewriter : public RewritePattern { + using FilterConstraintType = + std::function; + + explicit VectorTransferFullPartialRewriter( + MLIRContext *context, + VectorTransformsOptions options = VectorTransformsOptions(), + FilterConstraintType filter = + [](VectorTransferOpInterface op) { return success(); }, + PatternBenefit benefit = 1) + : RewritePattern(MatchAnyOpTypeTag(), benefit, context), options(options), + filter(std::move(filter)) {} + + /// Performs the rewrite. + LogicalResult matchAndRewrite(Operation *op, + PatternRewriter &rewriter) const override; + +private: + VectorTransformsOptions options; + FilterConstraintType filter; +}; + +} // namespace + +LogicalResult VectorTransferFullPartialRewriter::matchAndRewrite( Operation *op, PatternRewriter &rewriter) const { auto xferOp = dyn_cast(op); if (!xferOp || failed(splitFullAndPartialTransferPrecondition(xferOp)) || @@ -642,3 +667,9 @@ LogicalResult mlir::vector::VectorTransferFullPartialRewriter::matchAndRewrite( return failure(); return splitFullAndPartialTransfer(rewriter, xferOp, options); } + +void mlir::vector::populateVectorTransferFullPartialPatterns( + RewritePatternSet &patterns, const VectorTransformsOptions &options) { + patterns.add(patterns.getContext(), + options); +} diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp index fe59143ebd55f..20fc59e874ab6 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp @@ -51,102 +51,6 @@ using namespace mlir; using namespace mlir::vector; -// Helper to find an index in an affine map. -static std::optional getResultIndex(AffineMap map, int64_t index) { - for (int64_t i = 0, e = map.getNumResults(); i < e; ++i) { - int64_t idx = map.getDimPosition(i); - if (idx == index) - return i; - } - return std::nullopt; -} - -// Helper to construct iterator types with one index removed. -static SmallVector adjustIter(ArrayAttr iteratorTypes, - int64_t index) { - SmallVector results; - for (const auto &it : llvm::enumerate(iteratorTypes)) { - int64_t idx = it.index(); - if (idx == index) - continue; - results.push_back(it.value()); - } - return results; -} - -// Helper to construct an affine map with one index removed. -static AffineMap adjustMap(AffineMap map, int64_t index, - PatternRewriter &rewriter) { - auto *ctx = rewriter.getContext(); - SmallVector results; - for (int64_t i = 0, e = map.getNumResults(); i < e; ++i) { - int64_t idx = map.getDimPosition(i); - if (idx == index) - continue; - // Re-insert remaining indices, but renamed when occurring - // after the removed index. - auto targetExpr = getAffineDimExpr(idx < index ? idx : idx - 1, ctx); - results.push_back(targetExpr); - } - return AffineMap::get(map.getNumDims() - 1, 0, results, ctx); -} - -// Helper method to possibly drop a dimension in a load. -// TODO -static Value reshapeLoad(Location loc, Value val, VectorType type, - int64_t index, int64_t pos, - PatternRewriter &rewriter) { - if (index == -1) - return val; - Type lowType = VectorType::Builder(type).dropDim(0); - // At extraction dimension? - if (index == 0) { - auto posAttr = rewriter.getI64ArrayAttr(pos); - return rewriter.create(loc, lowType, val, posAttr); - } - // Unroll leading dimensions. - VectorType vType = lowType.cast(); - Type resType = VectorType::Builder(type).dropDim(index); - auto resVectorType = resType.cast(); - Value result = rewriter.create( - loc, resVectorType, rewriter.getZeroAttr(resVectorType)); - for (int64_t d = 0, e = resVectorType.getDimSize(0); d < e; d++) { - auto posAttr = rewriter.getI64ArrayAttr(d); - Value ext = rewriter.create(loc, vType, val, posAttr); - Value load = reshapeLoad(loc, ext, vType, index - 1, pos, rewriter); - result = rewriter.create(loc, resVectorType, load, result, - posAttr); - } - return result; -} - -// Helper method to possibly drop a dimension in a store. -// TODO -static Value reshapeStore(Location loc, Value val, Value result, - VectorType type, int64_t index, int64_t pos, - PatternRewriter &rewriter) { - // Unmodified? - if (index == -1) - return val; - // At insertion dimension? - if (index == 0) { - auto posAttr = rewriter.getI64ArrayAttr(pos); - return rewriter.create(loc, type, val, result, posAttr); - } - // Unroll leading dimensions. - Type lowType = VectorType::Builder(type).dropDim(0); - VectorType vType = lowType.cast(); - Type insType = VectorType::Builder(vType).dropDim(0); - for (int64_t d = 0, e = type.getDimSize(0); d < e; d++) { - auto posAttr = rewriter.getI64ArrayAttr(d); - Value ext = rewriter.create(loc, vType, result, posAttr); - Value ins = rewriter.create(loc, insType, val, posAttr); - Value sto = reshapeStore(loc, ins, ext, vType, index - 1, pos, rewriter); - result = rewriter.create(loc, type, sto, result, posAttr); - } - return result; -} - template static SmallVector extractVector(ArrayAttr arrayAttr) { return llvm::to_vector<4>(llvm::map_range( @@ -154,61 +58,11 @@ static SmallVector extractVector(ArrayAttr arrayAttr) { [](IntegerAttr attr) { return static_cast(attr.getInt()); })); } -/// Helper to create arithmetic operation associated with a kind of contraction. -static std::optional -createContractArithOp(Location loc, Value x, Value y, Value acc, - vector::CombiningKind kind, PatternRewriter &rewriter, - bool isInt, Value mask = Value()) { - using vector::CombiningKind; - Value mul; - - if (isInt) { - if (kind == CombiningKind::MINF || kind == CombiningKind::MAXF) - // Only valid for floating point types. - return std::nullopt; - mul = rewriter.create(loc, x, y); - } else { - // Float case. - if (kind == CombiningKind::AND || kind == CombiningKind::MINUI || - kind == CombiningKind::MINSI || kind == CombiningKind::MAXUI || - kind == CombiningKind::MAXSI || kind == CombiningKind::OR || - kind == CombiningKind::XOR) - // Only valid for integer types. - return std::nullopt; - // Special case for fused multiply-add. - if (acc && acc.getType().isa() && kind == CombiningKind::ADD) { - Value fma = rewriter.create(loc, x, y, acc); - if (mask) - // The fma op doesn't need explicit masking. However, fma ops used in - // reductions must preserve previous 'acc' values for masked-out lanes. - fma = selectPassthru(rewriter, mask, fma, acc); - return fma; - } - mul = rewriter.create(loc, x, y); - } - - if (!acc) - return std::optional(mul); - - return makeArithReduction(rewriter, loc, kind, mul, acc, mask); -} - -/// Return the positions of the reductions in the given map. -static SmallVector getReductionIndex(AffineMap map, - ArrayAttr iteratorTypes) { - SmallVector dimsIdx; - for (unsigned i = 0, e = map.getNumResults(); i < e; i++) { - if (isReductionIterator(iteratorTypes[map.getDimPosition(i)])) - dimsIdx.push_back(i); - } - return dimsIdx; -} - -/// Look for a given dimension in an affine map and return its position. Return -/// std::nullopt if the dimension is not in the map results. -static std::optional getDimPosition(AffineMap map, unsigned dim) { - for (unsigned i = 0, e = map.getNumResults(); i < e; i++) { - if (map.getDimPosition(i) == dim) +// Helper to find an index in an affine map. +static std::optional getResultIndex(AffineMap map, int64_t index) { + for (int64_t i = 0, e = map.getNumResults(); i < e; ++i) { + int64_t idx = map.getDimPosition(i); + if (idx == index) return i; } return std::nullopt; @@ -264,735 +118,6 @@ struct ShapeCastOpFolder : public OpRewritePattern { } }; -/// Progressive lowering of BroadcastOp. -class BroadcastOpLowering : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(vector::BroadcastOp op, - PatternRewriter &rewriter) const override { - auto loc = op.getLoc(); - VectorType dstType = op.getResultVectorType(); - VectorType srcType = op.getSourceType().dyn_cast(); - Type eltType = dstType.getElementType(); - - // Scalar to any vector can use splat. - if (!srcType) { - rewriter.replaceOpWithNewOp(op, dstType, op.getSource()); - return success(); - } - - // Determine rank of source and destination. - int64_t srcRank = srcType.getRank(); - int64_t dstRank = dstType.getRank(); - - // Stretching scalar inside vector (e.g. vector<1xf32>) can use splat. - if (srcRank <= 1 && dstRank == 1) { - Value ext; - if (srcRank == 0) - ext = rewriter.create(loc, op.getSource()); - else - ext = rewriter.create(loc, op.getSource(), 0); - rewriter.replaceOpWithNewOp(op, dstType, ext); - return success(); - } - - // Duplicate this rank. - // For example: - // %x = broadcast %y : k-D to n-D, k < n - // becomes: - // %b = broadcast %y : k-D to (n-1)-D - // %x = [%b,%b,%b,%b] : n-D - // becomes: - // %b = [%y,%y] : (n-1)-D - // %x = [%b,%b,%b,%b] : n-D - if (srcRank < dstRank) { - // Duplication. - VectorType resType = - VectorType::get(dstType.getShape().drop_front(), eltType); - Value bcst = - rewriter.create(loc, resType, op.getSource()); - Value result = rewriter.create( - loc, dstType, rewriter.getZeroAttr(dstType)); - for (int64_t d = 0, dim = dstType.getDimSize(0); d < dim; ++d) - result = rewriter.create(loc, bcst, result, d); - rewriter.replaceOp(op, result); - return success(); - } - - // Find non-matching dimension, if any. - assert(srcRank == dstRank); - int64_t m = -1; - for (int64_t r = 0; r < dstRank; r++) - if (srcType.getDimSize(r) != dstType.getDimSize(r)) { - m = r; - break; - } - - // All trailing dimensions are the same. Simply pass through. - if (m == -1) { - rewriter.replaceOp(op, op.getSource()); - return success(); - } - - // Any non-matching dimension forces a stretch along this rank. - // For example: - // %x = broadcast %y : vector<4x1x2xf32> to vector<4x2x2xf32> - // becomes: - // %a = broadcast %y[0] : vector<1x2xf32> to vector<2x2xf32> - // %b = broadcast %y[1] : vector<1x2xf32> to vector<2x2xf32> - // %c = broadcast %y[2] : vector<1x2xf32> to vector<2x2xf32> - // %d = broadcast %y[3] : vector<1x2xf32> to vector<2x2xf32> - // %x = [%a,%b,%c,%d] - // becomes: - // %u = broadcast %y[0][0] : vector<2xf32> to vector <2x2xf32> - // %v = broadcast %y[1][0] : vector<2xf32> to vector <2x2xf32> - // %a = [%u, %v] - // .. - // %x = [%a,%b,%c,%d] - VectorType resType = - VectorType::get(dstType.getShape().drop_front(), eltType); - Value result = rewriter.create( - loc, dstType, rewriter.getZeroAttr(dstType)); - if (m == 0) { - // Stetch at start. - Value ext = rewriter.create(loc, op.getSource(), 0); - Value bcst = rewriter.create(loc, resType, ext); - for (int64_t d = 0, dim = dstType.getDimSize(0); d < dim; ++d) - result = rewriter.create(loc, bcst, result, d); - } else { - // Stetch not at start. - for (int64_t d = 0, dim = dstType.getDimSize(0); d < dim; ++d) { - Value ext = rewriter.create(loc, op.getSource(), d); - Value bcst = rewriter.create(loc, resType, ext); - result = rewriter.create(loc, bcst, result, d); - } - } - rewriter.replaceOp(op, result); - return success(); - } -}; - -/// Given a 'transpose' pattern, prune the rightmost dimensions that are not -/// transposed. -void pruneNonTransposedDims(ArrayRef transpose, - SmallVectorImpl &result) { - size_t numTransposedDims = transpose.size(); - for (size_t transpDim : llvm::reverse(transpose)) { - if (transpDim != numTransposedDims - 1) - break; - numTransposedDims--; - } - - result.append(transpose.begin(), transpose.begin() + numTransposedDims); -} - -/// Progressive lowering of TransposeOp. -/// One: -/// %x = vector.transpose %y, [1, 0] -/// is replaced by: -/// %z = arith.constant dense<0.000000e+00> -/// %0 = vector.extract %y[0, 0] -/// %1 = vector.insert %0, %z [0, 0] -/// .. -/// %x = vector.insert .., .. [.., ..] -class TransposeOpLowering : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - TransposeOpLowering(vector::VectorTransformsOptions vectorTransformOptions, - MLIRContext *context, PatternBenefit benefit = 1) - : OpRewritePattern(context, benefit), - vectorTransformOptions(vectorTransformOptions) {} - - LogicalResult matchAndRewrite(vector::TransposeOp op, - PatternRewriter &rewriter) const override { - auto loc = op.getLoc(); - - Value input = op.getVector(); - VectorType inputType = op.getSourceVectorType(); - VectorType resType = op.getResultVectorType(); - - // Set up convenience transposition table. - SmallVector transp; - for (auto attr : op.getTransp()) - transp.push_back(attr.cast().getInt()); - - if (vectorTransformOptions.vectorTransposeLowering == - vector::VectorTransposeLowering::Shuffle && - resType.getRank() == 2 && transp[0] == 1 && transp[1] == 0) - return rewriter.notifyMatchFailure( - op, "Options specifies lowering to shuffle"); - - // Handle a true 2-D matrix transpose differently when requested. - if (vectorTransformOptions.vectorTransposeLowering == - vector::VectorTransposeLowering::Flat && - resType.getRank() == 2 && transp[0] == 1 && transp[1] == 0) { - Type flattenedType = - VectorType::get(resType.getNumElements(), resType.getElementType()); - auto matrix = - rewriter.create(loc, flattenedType, input); - auto rows = rewriter.getI32IntegerAttr(resType.getShape()[0]); - auto columns = rewriter.getI32IntegerAttr(resType.getShape()[1]); - Value trans = rewriter.create( - loc, flattenedType, matrix, rows, columns); - rewriter.replaceOpWithNewOp(op, resType, trans); - return success(); - } - - // Generate unrolled extract/insert ops. We do not unroll the rightmost - // (i.e., highest-order) dimensions that are not transposed and leave them - // in vector form to improve performance. Therefore, we prune those - // dimensions from the shape/transpose data structures used to generate the - // extract/insert ops. - SmallVector prunedTransp; - pruneNonTransposedDims(transp, prunedTransp); - size_t numPrunedDims = transp.size() - prunedTransp.size(); - auto prunedInShape = inputType.getShape().drop_back(numPrunedDims); - auto prunedInStrides = computeStrides(prunedInShape); - - // Generates the extract/insert operations for every scalar/vector element - // of the leftmost transposed dimensions. We traverse every transpose - // element using a linearized index that we delinearize to generate the - // appropriate indices for the extract/insert operations. - Value result = rewriter.create( - loc, resType, rewriter.getZeroAttr(resType)); - int64_t numTransposedElements = ShapedType::getNumElements(prunedInShape); - - for (int64_t linearIdx = 0; linearIdx < numTransposedElements; - ++linearIdx) { - auto extractIdxs = delinearize(linearIdx, prunedInStrides); - SmallVector insertIdxs(extractIdxs); - applyPermutationToVector(insertIdxs, prunedTransp); - Value extractOp = - rewriter.create(loc, input, extractIdxs); - result = - rewriter.create(loc, extractOp, result, insertIdxs); - } - - rewriter.replaceOp(op, result); - return success(); - } - -private: - /// Options to control the vector patterns. - vector::VectorTransformsOptions vectorTransformOptions; -}; - -/// Rewrite a 2-D vector.transpose as a sequence of: -/// vector.shape_cast 2D -> 1D -/// vector.shuffle -/// vector.shape_cast 1D -> 2D -class TransposeOp2DToShuffleLowering - : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - TransposeOp2DToShuffleLowering( - vector::VectorTransformsOptions vectorTransformOptions, - MLIRContext *context, PatternBenefit benefit = 1) - : OpRewritePattern(context, benefit), - vectorTransformOptions(vectorTransformOptions) {} - - LogicalResult matchAndRewrite(vector::TransposeOp op, - PatternRewriter &rewriter) const override { - auto loc = op.getLoc(); - - VectorType srcType = op.getSourceVectorType(); - if (srcType.getRank() != 2) - return rewriter.notifyMatchFailure(op, "Not a 2D transpose"); - - SmallVector transp; - for (auto attr : op.getTransp()) - transp.push_back(attr.cast().getInt()); - if (transp[0] != 1 && transp[1] != 0) - return rewriter.notifyMatchFailure(op, "Not a 2D transpose permutation"); - - if (vectorTransformOptions.vectorTransposeLowering != - VectorTransposeLowering::Shuffle) - return rewriter.notifyMatchFailure(op, "Options do not ask for Shuffle"); - - int64_t m = srcType.getShape().front(), n = srcType.getShape().back(); - Value casted = rewriter.create( - loc, VectorType::get({m * n}, srcType.getElementType()), - op.getVector()); - SmallVector mask; - mask.reserve(m * n); - for (int64_t j = 0; j < n; ++j) - for (int64_t i = 0; i < m; ++i) - mask.push_back(i * n + j); - - Value shuffled = - rewriter.create(loc, casted, casted, mask); - rewriter.replaceOpWithNewOp( - op, op.getResultVectorType(), shuffled); - - return success(); - } - -private: - /// Options to control the vector patterns. - vector::VectorTransformsOptions vectorTransformOptions; -}; - -/// Progressive lowering of OuterProductOp. -/// One: -/// %x = vector.outerproduct %lhs, %rhs, %acc -/// is replaced by: -/// %z = zero-result -/// %0 = vector.extract %lhs[0] -/// %1 = vector.broadcast %0 -/// %2 = vector.extract %acc[0] -/// %3 = vector.fma %1, %rhs, %2 -/// %4 = vector.insert %3, %z[0] -/// .. -/// %x = vector.insert %.., %..[N-1] -/// -class OuterProductOpLowering : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(vector::OuterProductOp op, - PatternRewriter &rewriter) const override { - auto loc = op.getLoc(); - - VectorType lhsType = op.getOperandVectorTypeLHS(); - VectorType rhsType = op.getOperandTypeRHS().dyn_cast(); - VectorType resType = op.getResultVectorType(); - Type eltType = resType.getElementType(); - bool isInt = eltType.isa(); - Value acc = (op.getAcc().empty()) ? nullptr : op.getAcc()[0]; - vector::CombiningKind kind = op.getKind(); - - // Vector mask setup. - OpBuilder::InsertionGuard guard(rewriter); - auto maskableOp = cast(op.getOperation()); - Operation *rootOp; - Value mask; - if (maskableOp.isMasked()) { - rewriter.setInsertionPoint(maskableOp.getMaskingOp()); - rootOp = maskableOp.getMaskingOp(); - mask = maskableOp.getMaskingOp().getMask(); - } else { - rootOp = op; - } - - if (!rhsType) { - // Special case: AXPY operation. - Value b = rewriter.create(loc, lhsType, op.getRhs()); - std::optional mult = createContractArithOp( - loc, op.getLhs(), b, acc, kind, rewriter, isInt, mask); - if (!mult.has_value()) - return failure(); - rewriter.replaceOp(rootOp, *mult); - return success(); - } - - Value result = rewriter.create( - loc, resType, rewriter.getZeroAttr(resType)); - for (int64_t d = 0, e = resType.getDimSize(0); d < e; ++d) { - auto pos = rewriter.getI64ArrayAttr(d); - Value x = rewriter.create(loc, op.getLhs(), pos); - Value a = rewriter.create(loc, rhsType, x); - Value r = nullptr; - if (acc) - r = rewriter.create(loc, acc, pos); - Value extrMask; - if (mask) - extrMask = rewriter.create(loc, mask, pos); - - std::optional m = createContractArithOp( - loc, a, op.getRhs(), r, kind, rewriter, isInt, extrMask); - if (!m.has_value()) - return failure(); - result = rewriter.create(loc, resType, *m, result, pos); - } - - rewriter.replaceOp(rootOp, result); - return success(); - } -}; - -/// Lower vector.contract with all size one reduction dimensions to -/// elementwise ops when possible. -struct ContractOpToElementwise - : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - using FilterConstraintType = - std::function; - static LogicalResult defaultFilter(vector::ContractionOp op) { - return success(); - } - ContractOpToElementwise( - vector::VectorTransformsOptions vectorTransformOptions, - MLIRContext *context, PatternBenefit benefit = 1, - const FilterConstraintType &constraint = defaultFilter) - : OpRewritePattern(context, benefit), - vectorTransformOptions(vectorTransformOptions), filter(defaultFilter) {} - - LogicalResult matchAndRewrite(vector::ContractionOp contractOp, - PatternRewriter &rewriter) const override { - // TODO: Support vector.mask. - auto maskableOp = cast(contractOp.getOperation()); - if (maskableOp.isMasked()) - return failure(); - - // TODO: Remove native masks from contraction op? - if (!contractOp.getMasks().empty()) - return failure(); - - if (failed(filter(contractOp))) - return failure(); - - if (vectorTransformOptions.vectorContractLowering != - vector::VectorContractLowering::ParallelArith) - return failure(); - - ArrayRef lhsShape = contractOp.getLhsType().getShape(); - ArrayRef rhsShape = contractOp.getRhsType().getShape(); - AffineMap lhsMap = contractOp.getIndexingMapsArray()[0]; - AffineMap rhsMap = contractOp.getIndexingMapsArray()[1]; - SmallVector lhsReductionDims = - getReductionIndex(lhsMap, contractOp.getIteratorTypes()); - SmallVector rhsReductionDims = - getReductionIndex(rhsMap, contractOp.getIteratorTypes()); - // All the reduction dimensions must be a size 1. - for (int64_t dim : lhsReductionDims) { - if (lhsShape[dim] != 1) - return failure(); - } - for (int64_t dim : rhsReductionDims) { - if (rhsShape[dim] != 1) - return failure(); - } - AffineMap accMap = contractOp.getIndexingMapsArray()[2]; - unsigned numParallelDims = accMap.getNumResults(); - unsigned numLhsDimToBroadcast = - numParallelDims - (lhsMap.getNumResults() - lhsReductionDims.size()); - unsigned numRhsDimToBroadcast = - numParallelDims - (rhsMap.getNumResults() - rhsReductionDims.size()); - SmallVector lhsDims; - SmallVector lhsTranspose; - SmallVector rhsDims; - SmallVector rhsTranspose; - for (int64_t dim : lhsReductionDims) - lhsTranspose.push_back(numLhsDimToBroadcast + dim); - for (int64_t dim : rhsReductionDims) - rhsTranspose.push_back(numRhsDimToBroadcast + dim); - // Loop through the parallel dimensions to calculate the dimensions to - // broadcast and to permute in order to extract only parallel dimensions. - for (unsigned i = 0; i < numParallelDims; i++) { - std::optional lhsDim = - getDimPosition(lhsMap, accMap.getDimPosition(i)); - if (lhsDim) { - lhsTranspose.push_back(numLhsDimToBroadcast + *lhsDim); - } else { - // If the parallel dimension doesn't exist we will have to broadcast it. - lhsDims.push_back( - contractOp.getResultType().cast().getDimSize(i)); - lhsTranspose.push_back(lhsDims.size() - 1); - } - std::optional rhsDim = - getDimPosition(rhsMap, accMap.getDimPosition(i)); - if (rhsDim) { - rhsTranspose.push_back(numRhsDimToBroadcast + *rhsDim); - } else { - // If the parallel dimension doesn't exist we will have to broadcast it. - rhsDims.push_back( - contractOp.getResultType().cast().getDimSize(i)); - rhsTranspose.push_back(rhsDims.size() - 1); - } - } - Value newLhs = contractOp.getLhs(); - Value newRhs = contractOp.getRhs(); - Location loc = contractOp.getLoc(); - if (!lhsDims.empty()) { - lhsDims.append(lhsShape.begin(), lhsShape.end()); - auto expandedType = - VectorType::get(lhsDims, contractOp.getLhsType().getElementType()); - newLhs = rewriter.create(loc, expandedType, newLhs); - } - if (!rhsDims.empty()) { - rhsDims.append(rhsShape.begin(), rhsShape.end()); - auto expandedType = - VectorType::get(rhsDims, contractOp.getRhsType().getElementType()); - newRhs = rewriter.create(loc, expandedType, newRhs); - } - bool isInt = contractOp.getLhsType().getElementType().isIntOrIndex(); - newLhs = rewriter.create(loc, newLhs, lhsTranspose); - newRhs = rewriter.create(loc, newRhs, rhsTranspose); - SmallVector lhsOffsets(lhsReductionDims.size(), 0); - SmallVector rhsOffsets(rhsReductionDims.size(), 0); - newLhs = rewriter.create( - loc, newLhs, rewriter.getI64ArrayAttr(lhsOffsets)); - newRhs = rewriter.create( - loc, newRhs, rewriter.getI64ArrayAttr(rhsOffsets)); - std::optional result = - createContractArithOp(loc, newLhs, newRhs, contractOp.getAcc(), - contractOp.getKind(), rewriter, isInt); - rewriter.replaceOp(contractOp, {*result}); - return success(); - } - -private: - /// Options to control the vector patterns. - vector::VectorTransformsOptions vectorTransformOptions; - FilterConstraintType filter; -}; - -/// Progressive lowering of ConstantMaskOp. -/// One: -/// %x = vector.constant_mask [a,b] -/// is replaced by: -/// %z = zero-result -/// %l = vector.constant_mask [b] -/// %4 = vector.insert %l, %z[0] -/// .. -/// %x = vector.insert %l, %..[a-1] -/// until a one-dimensional vector is reached. All these operations -/// will be folded at LLVM IR level. -class ConstantMaskOpLowering : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(vector::ConstantMaskOp op, - PatternRewriter &rewriter) const override { - auto loc = op.getLoc(); - auto dstType = op.getType(); - auto eltType = dstType.getElementType(); - auto dimSizes = op.getMaskDimSizes(); - int64_t rank = dstType.getRank(); - - if (rank == 0) { - assert(dimSizes.size() == 1 && - "Expected exactly one dim size for a 0-D vector"); - bool value = dimSizes[0].cast().getInt() == 1; - rewriter.replaceOpWithNewOp( - op, dstType, - DenseIntElementsAttr::get( - VectorType::get(ArrayRef{}, rewriter.getI1Type()), - ArrayRef{value})); - return success(); - } - - // Scalable constant masks can only be lowered for the "none set" case. - if (dstType.cast().isScalable()) { - rewriter.replaceOpWithNewOp( - op, DenseElementsAttr::get(dstType, false)); - return success(); - } - - int64_t trueDim = std::min(dstType.getDimSize(0), - dimSizes[0].cast().getInt()); - - if (rank == 1) { - // Express constant 1-D case in explicit vector form: - // [T,..,T,F,..,F]. - SmallVector values(dstType.getDimSize(0)); - for (int64_t d = 0; d < trueDim; d++) - values[d] = true; - rewriter.replaceOpWithNewOp( - op, dstType, rewriter.getBoolVectorAttr(values)); - return success(); - } - - VectorType lowType = - VectorType::get(dstType.getShape().drop_front(), eltType); - SmallVector newDimSizes; - for (int64_t r = 1; r < rank; r++) - newDimSizes.push_back(dimSizes[r].cast().getInt()); - Value trueVal = rewriter.create( - loc, lowType, rewriter.getI64ArrayAttr(newDimSizes)); - Value result = rewriter.create( - loc, dstType, rewriter.getZeroAttr(dstType)); - for (int64_t d = 0; d < trueDim; d++) { - auto pos = rewriter.getI64ArrayAttr(d); - result = - rewriter.create(loc, dstType, trueVal, result, pos); - } - rewriter.replaceOp(op, result); - return success(); - } -}; - -/// Progressive lowering of CreateMaskOp. -/// One: -/// %x = vector.create_mask %a, ... : vector -/// is replaced by: -/// %l = vector.create_mask ... : vector<...> ; one lower rank -/// %0 = arith.cmpi "slt", %ci, %a | -/// %1 = select %0, %l, %zeroes | -/// %r = vector.insert %1, %pr [i] | d-times -/// %x = .... -/// until a one-dimensional vector is reached. -class CreateMaskOpLowering : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(vector::CreateMaskOp op, - PatternRewriter &rewriter) const override { - auto dstType = op.getResult().getType().cast(); - int64_t rank = dstType.getRank(); - if (rank <= 1) - return rewriter.notifyMatchFailure( - op, "0-D and 1-D vectors are handled separately"); - - auto loc = op.getLoc(); - auto eltType = dstType.getElementType(); - int64_t dim = dstType.getDimSize(0); - Value idx = op.getOperand(0); - - VectorType lowType = - VectorType::get(dstType.getShape().drop_front(), eltType); - Value trueVal = rewriter.create( - loc, lowType, op.getOperands().drop_front()); - Value falseVal = rewriter.create( - loc, lowType, rewriter.getZeroAttr(lowType)); - Value result = rewriter.create( - loc, dstType, rewriter.getZeroAttr(dstType)); - for (int64_t d = 0; d < dim; d++) { - Value bnd = - rewriter.create(loc, rewriter.getIndexAttr(d)); - Value val = rewriter.create(loc, arith::CmpIPredicate::slt, - bnd, idx); - Value sel = rewriter.create(loc, val, trueVal, falseVal); - auto pos = rewriter.getI64ArrayAttr(d); - result = - rewriter.create(loc, dstType, sel, result, pos); - } - rewriter.replaceOp(op, result); - return success(); - } -}; - -/// ShapeOp 2D -> 1D downcast serves the purpose of flattening 2-D to 1-D -/// vectors progressively on the way to target llvm.matrix intrinsics. -/// This iterates over the most major dimension of the 2-D vector and performs -/// rewrites into: -/// vector.extract from 2-D + vector.insert_strided_slice offset into 1-D -class ShapeCastOp2DDownCastRewritePattern - : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(vector::ShapeCastOp op, - PatternRewriter &rewriter) const override { - auto sourceVectorType = op.getSourceVectorType(); - auto resultVectorType = op.getResultVectorType(); - if (sourceVectorType.getRank() != 2 || resultVectorType.getRank() != 1) - return failure(); - - auto loc = op.getLoc(); - Value desc = rewriter.create( - loc, resultVectorType, rewriter.getZeroAttr(resultVectorType)); - unsigned mostMinorVectorSize = sourceVectorType.getShape()[1]; - for (int64_t i = 0, e = sourceVectorType.getShape().front(); i != e; ++i) { - Value vec = rewriter.create(loc, op.getSource(), i); - desc = rewriter.create( - loc, vec, desc, - /*offsets=*/i * mostMinorVectorSize, /*strides=*/1); - } - rewriter.replaceOp(op, desc); - return success(); - } -}; - -/// ShapeOp 1D -> 2D upcast serves the purpose of unflattening 2-D from 1-D -/// vectors progressively. -/// This iterates over the most major dimension of the 2-D vector and performs -/// rewrites into: -/// vector.extract_strided_slice from 1-D + vector.insert into 2-D -/// Note that 1-D extract_strided_slice are lowered to efficient vector.shuffle. -class ShapeCastOp2DUpCastRewritePattern - : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(vector::ShapeCastOp op, - PatternRewriter &rewriter) const override { - auto sourceVectorType = op.getSourceVectorType(); - auto resultVectorType = op.getResultVectorType(); - if (sourceVectorType.getRank() != 1 || resultVectorType.getRank() != 2) - return failure(); - - auto loc = op.getLoc(); - Value desc = rewriter.create( - loc, resultVectorType, rewriter.getZeroAttr(resultVectorType)); - unsigned mostMinorVectorSize = resultVectorType.getShape()[1]; - for (int64_t i = 0, e = resultVectorType.getShape().front(); i != e; ++i) { - Value vec = rewriter.create( - loc, op.getSource(), /*offsets=*/i * mostMinorVectorSize, - /*sizes=*/mostMinorVectorSize, - /*strides=*/1); - desc = rewriter.create(loc, vec, desc, i); - } - rewriter.replaceOp(op, desc); - return success(); - } -}; - -// We typically should not lower general shape cast operations into data -// movement instructions, since the assumption is that these casts are -// optimized away during progressive lowering. For completeness, however, -// we fall back to a reference implementation that moves all elements -// into the right place if we get here. -class ShapeCastOpRewritePattern : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(vector::ShapeCastOp op, - PatternRewriter &rewriter) const override { - Location loc = op.getLoc(); - auto sourceVectorType = op.getSourceVectorType(); - auto resultVectorType = op.getResultVectorType(); - - // Special case 2D/1D lowerings with better implementations. - // TODO: make is ND/1D to allow generic ND->1D->MD. - int64_t srcRank = sourceVectorType.getRank(); - int64_t resRank = resultVectorType.getRank(); - if ((srcRank == 2 && resRank == 1) || (srcRank == 1 && resRank == 2)) - return failure(); - - // Generic ShapeCast lowering path goes all the way down to unrolled scalar - // extract/insert chains. - // TODO: consider evolving the semantics to only allow 1D source or dest and - // drop this potentially very expensive lowering. - // Compute number of elements involved in the reshape. - int64_t numElts = 1; - for (int64_t r = 0; r < srcRank; r++) - numElts *= sourceVectorType.getDimSize(r); - // Replace with data movement operations: - // x[0,0,0] = y[0,0] - // x[0,0,1] = y[0,1] - // x[0,1,0] = y[0,2] - // etc., incrementing the two index vectors "row-major" - // within the source and result shape. - SmallVector srcIdx(srcRank); - SmallVector resIdx(resRank); - Value result = rewriter.create( - loc, resultVectorType, rewriter.getZeroAttr(resultVectorType)); - for (int64_t i = 0; i < numElts; i++) { - if (i != 0) { - incIdx(srcIdx, sourceVectorType, srcRank - 1); - incIdx(resIdx, resultVectorType, resRank - 1); - } - Value e = rewriter.create(loc, op.getSource(), srcIdx); - result = rewriter.create(loc, e, result, resIdx); - } - rewriter.replaceOp(op, result); - return success(); - } - -private: - static void incIdx(SmallVector &idx, VectorType tp, int64_t r) { - assert(0 <= r && r < tp.getRank()); - if (++idx[r] == tp.getDimSize(r)) { - idx[r] = 0; - incIdx(idx, tp, r - 1); - } - } -}; - /// Convert MulIOp/MulFOp + MultiDimReductionOp into ContractionOp. /// Ex: /// ``` @@ -1425,967 +550,6 @@ struct ReorderElementwiseOpsOnTranspose final } }; -} // namespace - -/// Creates an AddIOp if `isInt` is true otherwise create an arith::AddFOp using -/// operands `x` and `y`. -static Value createAdd(Location loc, Value x, Value y, bool isInt, - PatternRewriter &rewriter) { - if (isInt) - return rewriter.create(loc, x, y); - return rewriter.create(loc, x, y); -} - -/// Creates a MulIOp if `isInt` is true otherwise create an MulFOp using -/// operands `x and `y`. -static Value createMul(Location loc, Value x, Value y, bool isInt, - PatternRewriter &rewriter) { - if (isInt) - return rewriter.create(loc, x, y); - return rewriter.create(loc, x, y); -} - -namespace mlir { - -/// Progressively lower a `vector.contract %a, %b, %c` with row-major matmul -/// semantics to: -/// ``` -/// %mta = maybe_transpose -/// %mtb = maybe_transpose -/// %flattened_a = vector.shape_cast %mta -/// %flattened_b = vector.shape_cast %mtb -/// %flattened_d = vector.matmul %flattened_a, %flattened_b -/// %mtd = vector.shape_cast %flattened_d -/// %d = maybe_untranspose %mtd -/// %e = add %c, %d -/// ``` -/// `vector.matmul` later lowers to `llvm.matrix.multiply`. -// -/// This only kicks in when VectorTransformsOptions is set to `Matmul`. -/// vector.transpose operations are inserted if the vector.contract op is not a -/// row-major matrix multiply. -LogicalResult -ContractionOpToMatmulOpLowering::matchAndRewrite(vector::ContractionOp op, - PatternRewriter &rew) const { - // TODO: Support vector.mask. - auto maskableOp = cast(op.getOperation()); - if (maskableOp.isMasked()) - return failure(); - - // TODO: Remove native masks from contraction op? - if (!op.getMasks().empty()) - return failure(); - if (vectorTransformOptions.vectorContractLowering != - vector::VectorContractLowering::Matmul) - return failure(); - if (failed(filter(op))) - return failure(); - - auto iteratorTypes = op.getIteratorTypes().getValue(); - if (!isParallelIterator(iteratorTypes[0]) || - !isParallelIterator(iteratorTypes[1]) || - !isReductionIterator(iteratorTypes[2])) - return failure(); - - Type elementType = op.getLhsType().getElementType(); - if (!elementType.isIntOrFloat()) - return failure(); - - Type dstElementType = op.getType(); - if (auto vecType = dstElementType.dyn_cast()) - dstElementType = vecType.getElementType(); - if (elementType != dstElementType) - return failure(); - - // Perform lhs + rhs transpositions to conform to matmul row-major semantics. - // Bail out if the contraction cannot be put in this form. - MLIRContext *ctx = op.getContext(); - Location loc = op.getLoc(); - AffineExpr m, n, k; - bindDims(rew.getContext(), m, n, k); - // LHS must be A(m, k) or A(k, m). - Value lhs = op.getLhs(); - auto lhsMap = op.getIndexingMapsArray()[0]; - if (lhsMap == AffineMap::get(3, 0, {k, m}, ctx)) - lhs = rew.create(loc, lhs, ArrayRef{1, 0}); - else if (lhsMap != AffineMap::get(3, 0, {m, k}, ctx)) - return failure(); - - // RHS must be B(k, n) or B(n, k). - Value rhs = op.getRhs(); - auto rhsMap = op.getIndexingMapsArray()[1]; - if (rhsMap == AffineMap::get(3, 0, {n, k}, ctx)) - rhs = rew.create(loc, rhs, ArrayRef{1, 0}); - else if (rhsMap != AffineMap::get(3, 0, {k, n}, ctx)) - return failure(); - - // At this point lhs and rhs are in row-major. - VectorType lhsType = lhs.getType().cast(); - VectorType rhsType = rhs.getType().cast(); - int64_t lhsRows = lhsType.getDimSize(0); - int64_t lhsColumns = lhsType.getDimSize(1); - int64_t rhsColumns = rhsType.getDimSize(1); - - Type flattenedLHSType = - VectorType::get(lhsType.getNumElements(), lhsType.getElementType()); - lhs = rew.create(loc, flattenedLHSType, lhs); - - Type flattenedRHSType = - VectorType::get(rhsType.getNumElements(), rhsType.getElementType()); - rhs = rew.create(loc, flattenedRHSType, rhs); - - Value mul = rew.create(loc, lhs, rhs, lhsRows, lhsColumns, - rhsColumns); - mul = rew.create( - loc, - VectorType::get({lhsRows, rhsColumns}, - getElementTypeOrSelf(op.getAcc().getType())), - mul); - - // ACC must be C(m, n) or C(n, m). - auto accMap = op.getIndexingMapsArray()[2]; - if (accMap == AffineMap::get(3, 0, {n, m}, ctx)) - mul = rew.create(loc, mul, ArrayRef{1, 0}); - else if (accMap != AffineMap::get(3, 0, {m, n}, ctx)) - llvm_unreachable("invalid contraction semantics"); - - Value res = - elementType.isa() - ? static_cast(rew.create(loc, op.getAcc(), mul)) - : static_cast( - rew.create(loc, op.getAcc(), mul)); - - rew.replaceOp(op, res); - return success(); -} - -namespace { - -/// Generate a vector implementation for matmat, matvec and tmatvec. -/// This unrolls outer-products along the reduction dimension. -struct UnrolledOuterProductGenerator - : public StructuredGenerator { - UnrolledOuterProductGenerator(RewriterBase &b, vector::ContractionOp op) - : StructuredGenerator(b, op), - kind(op.getKind()), lhs(op.getLhs()), rhs(op.getRhs()), - res(op.getAcc()), lhsType(op.getLhsType()) { - auto maskableOp = cast(op.getOperation()); - if (maskableOp.isMasked()) - mask = maskableOp.getMaskingOp().getMask(); - } - - Value t(Value v, ArrayRef perm = {1, 0}) { - if (!v) - return v; - return rewriter.create(loc, v, perm); - } - - Value promote(Value v, Type dstElementType) { - Type elementType = v.getType(); - auto vecType = elementType.dyn_cast(); - if (vecType) - elementType = vecType.getElementType(); - if (elementType == dstElementType) - return v; - Type promotedType = dstElementType; - if (vecType) - promotedType = VectorType::get(vecType.getShape(), promotedType); - if (dstElementType.isa()) - return rewriter.create(loc, promotedType, v); - return rewriter.create(loc, promotedType, v); - } - - FailureOr outerProd(Value lhs, Value rhs, Value res, int reductionSize, - std::optional maybeMask = std::nullopt) { - assert(reductionSize > 0); - // Incremental support for masking. - if (mask && !maybeMask.has_value()) - return failure(); - - Type resElementType = res.getType().cast().getElementType(); - for (int64_t k = 0; k < reductionSize; ++k) { - Value extractA = rewriter.create(loc, lhs, k); - Value extractB = rewriter.create(loc, rhs, k); - extractA = promote(extractA, resElementType); - extractB = promote(extractB, resElementType); - Value extractMask; - if (maybeMask.has_value() && maybeMask.value()) - extractMask = - rewriter.create(loc, maybeMask.value(), k); - - Operation *outerProdOp = rewriter.create( - loc, res.getType(), extractA, extractB, res, kind); - res = maskOperation(rewriter, outerProdOp, extractMask)->getResult(0); - } - return res; - } - - /// Two outer parallel, one inner reduction (matmat flavor). - FailureOr matmat() { - if (!iters({Par(), Par(), Red()})) - return failure(); - // Set up the parallel/reduction structure in the right form. - AffineExpr m, n, k; - bindDims(rewriter.getContext(), m, n, k); - // Classical row-major matmul: Just permute the lhs. - if (layout({{m, k}, {k, n}, {m, n}})) - return outerProd(t(lhs), rhs, res, lhsType.getDimSize(1), - t(mask, {2, 0, 1})); - // TODO: may be better to fail and use some vector -> scalar reduction. - if (layout({{m, k}, {n, k}, {m, n}})) { - Value tlhs = t(lhs); - return outerProd(tlhs, t(rhs), res, lhsType.getDimSize(1)); - } - // No need to permute anything. - if (layout({{k, m}, {k, n}, {m, n}})) - return outerProd(lhs, rhs, res, lhsType.getDimSize(0)); - // Just permute the rhs. - if (layout({{k, m}, {n, k}, {m, n}})) - return outerProd(lhs, t(rhs), res, lhsType.getDimSize(0)); - // Transposed output: swap RHS and LHS. - // Classical row-major matmul: permute the lhs. - if (layout({{m, k}, {k, n}, {n, m}})) - return outerProd(rhs, t(lhs), res, lhsType.getDimSize(1)); - // TODO: may be better to fail and use some vector -> scalar reduction. - if (layout({{m, k}, {n, k}, {n, m}})) { - Value trhs = t(rhs); - return outerProd(trhs, t(lhs), res, lhsType.getDimSize(1)); - } - if (layout({{k, m}, {k, n}, {n, m}})) - return outerProd(rhs, lhs, res, lhsType.getDimSize(0)); - if (layout({{k, m}, {n, k}, {n, m}})) - return outerProd(t(rhs), lhs, res, lhsType.getDimSize(0)); - return failure(); - } - - /// One outer parallel, one inner reduction (matvec flavor) - FailureOr matvec() { - if (!iters({Par(), Red()})) - return failure(); - AffineExpr m, k; - bindDims(rewriter.getContext(), m, k); - - // Case mat-vec: transpose. - if (layout({{m, k}, {k}, {m}})) - return outerProd(t(lhs), rhs, res, lhsType.getDimSize(1), t(mask)); - // Case mat-trans-vec: ready to go. - if (layout({{k, m}, {k}, {m}})) - return outerProd(lhs, rhs, res, lhsType.getDimSize(0)); - // Case vec-mat: swap and transpose. - if (layout({{k}, {m, k}, {m}})) - return outerProd(t(rhs), lhs, res, lhsType.getDimSize(0)); - // Case vec-mat-trans: swap and ready to go. - if (layout({{k}, {k, m}, {m}})) - return outerProd(rhs, lhs, res, lhsType.getDimSize(0)); - return failure(); - } - - // - // One outer reduction, one inner parallel (tmatvec flavor) - // - FailureOr tmatvec() { - if (!iters({Red(), Par()})) - return failure(); - AffineExpr k, m; - bindDims(rewriter.getContext(), k, m); - - // Case mat-vec: transpose. - if (layout({{m, k}, {k}, {m}})) - return outerProd(t(lhs), rhs, res, lhsType.getDimSize(1)); - // Case mat-trans-vec: ready to go. - if (layout({{k, m}, {k}, {m}})) - return outerProd(lhs, rhs, res, lhsType.getDimSize(0)); - // Case vec-mat: swap and transpose. - if (layout({{k}, {m, k}, {m}})) - return outerProd(t(rhs), lhs, res, lhsType.getDimSize(0)); - // Case vec-mat-trans: swap and ready to go. - if (layout({{k}, {k, m}, {m}})) - return outerProd(rhs, lhs, res, lhsType.getDimSize(0)); - return failure(); - } - -private: - vector::CombiningKind kind; - Value lhs, rhs, res, mask; - VectorType lhsType; -}; -} // namespace - -/// Progressively lower a `vector.contract %a, %b, %c` with row-major matmul -/// semantics to a reduction_size-unrolled sequence: -/// ``` -/// %at = vector.transpose %a, [1, 0] -/// %bRow0 = vector.extract %b[0] -/// %atRow0 = vector.extract %at[0] -/// %c0 = vector.outerproduct %atRow0, %bRow0, %c -/// ... -/// %bRowK = vector.extract %b[K] -/// %atRowK = vector.extract %at[K] -/// %cK = vector.outerproduct %atRowK, %bRowK, %cK-1 -/// ``` -/// -/// This only kicks in when VectorTransformsOptions is set to OuterProduct but -/// otherwise supports any layout permutation of the matrix-multiply. -LogicalResult ContractionOpToOuterProductOpLowering::matchAndRewrite( - vector::ContractionOp op, PatternRewriter &rewriter) const { - // TODO: Remove native masks from contraction op? - if (!op.getMasks().empty()) - return failure(); - - if (vectorTransformOptions.vectorContractLowering != - vector::VectorContractLowering::OuterProduct) - return failure(); - - if (failed(filter(op))) - return failure(); - - // Vector mask setup. - OpBuilder::InsertionGuard guard(rewriter); - auto maskableOp = cast(op.getOperation()); - Operation *rootOp; - if (maskableOp.isMasked()) { - rewriter.setInsertionPoint(maskableOp.getMaskingOp()); - rootOp = maskableOp.getMaskingOp(); - } else { - rootOp = op; - } - - UnrolledOuterProductGenerator e(rewriter, op); - FailureOr matmatRes = e.matmat(); - if (succeeded(matmatRes)) { - rewriter.replaceOp(rootOp, *matmatRes); - return success(); - } - FailureOr matvecRes = e.matvec(); - if (succeeded(matvecRes)) { - rewriter.replaceOp(rootOp, *matvecRes); - return success(); - } - FailureOr tmatvecRes = e.tmatvec(); - if (succeeded(tmatvecRes)) { - rewriter.replaceOp(rootOp, *tmatvecRes); - return success(); - } - - return failure(); -} - -LogicalResult -ContractionOpToDotLowering::matchAndRewrite(vector::ContractionOp op, - PatternRewriter &rewriter) const { - // TODO: Support vector.mask. - auto maskableOp = cast(op.getOperation()); - if (maskableOp.isMasked()) - return failure(); - - // TODO: Remove native masks from contraction op? - if (!op.getMasks().empty()) - return failure(); - - if (failed(filter(op))) - return failure(); - - if (vectorTransformOptions.vectorContractLowering != - vector::VectorContractLowering::Dot) - return failure(); - - auto iteratorTypes = op.getIteratorTypes().getValue(); - static constexpr std::array perm = {1, 0}; - Location loc = op.getLoc(); - Value lhs = op.getLhs(), rhs = op.getRhs(); - - using MapList = ArrayRef>; - auto infer = [](MapList m) { return AffineMap::inferFromExprList(m); }; - AffineExpr m, n, k; - bindDims(rewriter.getContext(), m, n, k); - SmallVector maps = op.getIndexingMapsArray(); - // - // In the following we wish to make the reduction dimension innermost so we - // can load vectors and just fmul + reduce into a scalar. - // - if (isParallelIterator(iteratorTypes[0]) && - isParallelIterator(iteratorTypes[1]) && - isReductionIterator(iteratorTypes[2])) { - // - // Two outer parallel, one inner reduction (matmat flavor). - // - if (maps == infer({{m, k}, {k, n}, {m, n}})) { - rhs = rewriter.create(loc, rhs, perm); - } else if (maps == infer({{m, k}, {n, k}, {m, n}})) { - // No need to permute anything. - } else if (maps == infer({{k, m}, {k, n}, {m, n}})) { - lhs = rewriter.create(loc, lhs, perm); - rhs = rewriter.create(loc, rhs, perm); - } else if (maps == infer({{k, m}, {n, k}, {m, n}})) { - lhs = rewriter.create(loc, lhs, perm); - } else if (maps == infer({{m, k}, {k, n}, {n, m}})) { - // This is the classical row-major matmul. Just permute the lhs. - Value tmp = lhs; - lhs = rewriter.create(loc, rhs, perm); - rhs = tmp; - } else if (maps == infer({{m, k}, {n, k}, {n, m}})) { - std::swap(lhs, rhs); - } else if (maps == infer({{k, m}, {k, n}, {n, m}})) { - Value tmp = lhs; - lhs = rewriter.create(loc, rhs, perm); - rhs = rewriter.create(loc, tmp, perm); - } else if (maps == infer({{k, m}, {n, k}, {n, m}})) { - Value tmp = rhs; - rhs = rewriter.create(loc, lhs, perm); - lhs = tmp; - } else { - return failure(); - } - } else if (isParallelIterator(iteratorTypes[0]) && - isReductionIterator(iteratorTypes[1])) { - // - // One outer parallel, one inner reduction (matvec flavor) - // - if (maps == infer({{m, n}, {n}, {m}})) { - // No need to permute anything. - } else if (maps == infer({{n, m}, {n}, {m}})) { - lhs = rewriter.create(loc, lhs, perm); - } else if (maps == infer({{n}, {m, n}, {m}})) { - std::swap(lhs, rhs); - } else if (maps == infer({{n}, {n, m}, {m}})) { - std::swap(lhs, rhs); - lhs = rewriter.create(loc, lhs, perm); - } else { - return failure(); - } - } else { - return failure(); - } - - VectorType dstType = op.getResultType().cast(); - assert(dstType.getRank() >= 1 && dstType.getRank() <= 2 && - "Expected dst type of rank 1 or 2"); - - unsigned rank = dstType.getRank(); - unsigned dstRows = dstType.getShape()[0]; - unsigned dstColumns = rank == 1 ? 1 : dstType.getShape()[1]; - - // ExtractOp does not allow dynamic indexing, we must unroll explicitly. - Value res = rewriter.create(loc, dstType, - rewriter.getZeroAttr(dstType)); - bool isInt = dstType.getElementType().isa(); - for (unsigned r = 0; r < dstRows; ++r) { - Value a = rewriter.create(op.getLoc(), lhs, r); - for (unsigned c = 0; c < dstColumns; ++c) { - Value b = rank == 1 - ? rhs - : rewriter.create(op.getLoc(), rhs, c); - Value m = createMul(op.getLoc(), a, b, isInt, rewriter); - Value reduced = rewriter.create( - op.getLoc(), vector::CombiningKind::ADD, m); - - SmallVector pos = rank == 1 ? SmallVector{r} - : SmallVector{r, c}; - res = rewriter.create(op.getLoc(), reduced, res, pos); - } - } - if (auto acc = op.getAcc()) - res = createAdd(op.getLoc(), res, acc, isInt, rewriter); - rewriter.replaceOp(op, res); - return success(); -} - -/// Progressive lowering of ContractionOp. -/// One: -/// %x = vector.contract with at least one free/batch dimension -/// is replaced by: -/// %a = vector.contract with one less free/batch dimension -/// %b = vector.contract with one less free/batch dimension -/// .. -/// %x = combine %a %b .. -/// until a pure contraction is reached (no free/batch dimensions), -/// which is replaced by a dot-product. -/// -/// This only kicks in when either VectorTransformsOptions is set -/// to DOT or when other contraction patterns fail. -// -// TODO: break down into transpose/reshape/cast ops -// when they become available to avoid code dup -// TODO: investigate lowering order impact on performance -LogicalResult -ContractionOpLowering::matchAndRewrite(vector::ContractionOp op, - PatternRewriter &rewriter) const { - // TODO: Remove native masks from contraction op? - if (!op.getMasks().empty()) - return failure(); - - if (failed(filter(op))) - return failure(); - - // TODO: support mixed mode contract lowering. - if (op.getLhsType().getElementType() != - getElementTypeOrSelf(op.getAccType()) || - op.getRhsType().getElementType() != getElementTypeOrSelf(op.getAccType())) - return failure(); - - // TODO: the code below assumes the default contraction, make sure it supports - // other kinds before enabling this lowering. - if (op.getKind() != vector::CombiningKind::ADD) { - return rewriter.notifyMatchFailure( - op, "contractions other than 'add' not supported"); - } - - // TODO: implement benefits, cost models. - MLIRContext *ctx = op.getContext(); - ContractionOpToMatmulOpLowering pat1(vectorTransformOptions, ctx); - if (succeeded(pat1.matchAndRewrite(op, rewriter))) - return success(); - ContractionOpToOuterProductOpLowering pat2(vectorTransformOptions, ctx); - if (succeeded(pat2.matchAndRewrite(op, rewriter))) - return success(); - ContractionOpToDotLowering pat3(vectorTransformOptions, ctx); - if (succeeded(pat3.matchAndRewrite(op, rewriter))) - return success(); - ContractOpToElementwise pat4(vectorTransformOptions, ctx); - if (succeeded(pat4.matchAndRewrite(op, rewriter))) - return success(); - - // Vector mask setup. - OpBuilder::InsertionGuard guard(rewriter); - Operation *rootOp = op; - Value mask; - if (op.isMasked()) { - rewriter.setInsertionPoint(op.getMaskingOp()); - rootOp = op.getMaskingOp(); - mask = op.getMaskingOp().getMask(); - } - - // Find first batch dimension in LHS/RHS, and lower when found. - std::vector> batchDimMap = op.getBatchDimMap(); - if (!batchDimMap.empty()) { - int64_t lhsIndex = batchDimMap[0].first; - int64_t rhsIndex = batchDimMap[0].second; - auto newOp = lowerParallel(rewriter, op, lhsIndex, rhsIndex, mask); - if (failed(newOp)) - return failure(); - rewriter.replaceOp(rootOp, *newOp); - return success(); - } - - // Collect contracting dimensions. - std::vector> contractingDimMap = - op.getContractingDimMap(); - DenseSet lhsContractingDimSet; - DenseSet rhsContractingDimSet; - for (auto &dimPair : contractingDimMap) { - lhsContractingDimSet.insert(dimPair.first); - rhsContractingDimSet.insert(dimPair.second); - } - - // Find first free dimension in LHS, and lower when found. - VectorType lhsType = op.getLhsType(); - for (int64_t lhsIndex = 0, e = lhsType.getRank(); lhsIndex < e; ++lhsIndex) { - if (lhsContractingDimSet.count(lhsIndex) == 0) { - auto newOp = lowerParallel(rewriter, op, lhsIndex, /*rhsIndex=*/-1, mask); - if (failed(newOp)) - return failure(); - rewriter.replaceOp(rootOp, *newOp); - return success(); - } - } - - // Find first free dimension in RHS, and lower when found. - VectorType rhsType = op.getRhsType(); - for (int64_t rhsIndex = 0, e = rhsType.getRank(); rhsIndex < e; ++rhsIndex) { - if (rhsContractingDimSet.count(rhsIndex) == 0) { - auto newOp = lowerParallel(rewriter, op, /*lhsIndex=*/-1, rhsIndex, mask); - if (failed(newOp)) - return failure(); - rewriter.replaceOp(rootOp, *newOp); - return success(); - } - } - - // Lower the first remaining reduction dimension. - if (!contractingDimMap.empty()) { - auto newOp = lowerReduction(rewriter, op, mask); - if (failed(newOp)) - return failure(); - rewriter.replaceOp(rootOp, *newOp); - return success(); - } - - return failure(); -} - -// Lower one parallel dimension. -// Incidentally also tolerates unit-size (hence trivial) reduction dimensions. -// TODO: consider reusing existing contract unrolling -FailureOr ContractionOpLowering::lowerParallel(PatternRewriter &rewriter, - vector::ContractionOp op, - int64_t lhsIndex, - int64_t rhsIndex, - Value mask) const { - VectorType lhsType = op.getLhsType(); - VectorType rhsType = op.getRhsType(); - VectorType resType = op.getResultType().cast(); - // Find the iterator type index and result index. - SmallVector iMap = op.getIndexingMapsArray(); - int64_t iterIndex = -1; - int64_t dimSize = -1; - if (lhsIndex >= 0) { - iterIndex = iMap[0].getDimPosition(lhsIndex); - if (rhsIndex >= 0 && iterIndex != iMap[1].getDimPosition(rhsIndex)) - return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { - diag << "expected lhsIndex=" << lhsIndex << " and rhsIndex=" << rhsIndex - << " to map to the same dimension"; - }); - dimSize = lhsType.getDimSize(lhsIndex); - } else if (rhsIndex >= 0) { - iterIndex = iMap[1].getDimPosition(rhsIndex); - dimSize = rhsType.getDimSize(rhsIndex); - } - if (iterIndex < 0) - return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { - diag << "expected either lhsIndex=" << lhsIndex - << " or rhsIndex=" << rhsIndex << " to be nonnegative"; - }); - // value_or(-1) means that we tolerate a dimension not appearing - // in the result map. That can't happen for actual parallel iterators, but - // the caller ContractionOpLowering::matchAndRewrite is currently calling - // lowerParallel also for the case of unit-size reduction dims appearing only - // on one of LHS or RHS, not both. At the moment, such cases are created by - // CastAwayContractionLeadingOneDim, so we need to either support that or - // modify that pattern. - int64_t resIndex = getResultIndex(iMap[2], iterIndex).value_or(-1); - if (resIndex == -1 && dimSize != 1) - return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { - diag << "expected the dimension for iterIndex=" << iterIndex - << " to either appear in the result map, or to be a unit dimension"; - }); - - // Construct new iterator types and affine map array attribute. - std::array lowIndexingMaps = { - adjustMap(iMap[0], iterIndex, rewriter), - adjustMap(iMap[1], iterIndex, rewriter), - adjustMap(iMap[2], iterIndex, rewriter)}; - auto lowAffine = rewriter.getAffineMapArrayAttr(lowIndexingMaps); - auto lowIter = - rewriter.getArrayAttr(adjustIter(op.getIteratorTypes(), iterIndex)); - // Unroll into a series of lower dimensional vector.contract ops. - Location loc = op.getLoc(); - Value result = rewriter.create( - loc, resType, rewriter.getZeroAttr(resType)); - - for (int64_t d = 0; d < dimSize; ++d) { - auto lhs = reshapeLoad(loc, op.getLhs(), lhsType, lhsIndex, d, rewriter); - auto rhs = reshapeLoad(loc, op.getRhs(), rhsType, rhsIndex, d, rewriter); - auto acc = reshapeLoad(loc, op.getAcc(), resType, resIndex, d, rewriter); - - Value lowMask; - if (mask) - lowMask = reshapeLoad(loc, mask, cast(mask.getType()), - iterIndex, d, rewriter); - - Operation *lowContract = rewriter.create( - loc, lhs, rhs, acc, lowAffine, lowIter); - lowContract = maskOperation(rewriter, lowContract, lowMask); - result = reshapeStore(loc, lowContract->getResult(0), result, resType, - resIndex, d, rewriter); - } - return result; -} - -// Lower one reduction dimension. -FailureOr ContractionOpLowering::lowerReduction( - PatternRewriter &rewriter, vector::ContractionOp op, Value mask) const { - auto loc = op.getLoc(); - VectorType lhsType = op.getLhsType(); - VectorType rhsType = op.getRhsType(); - Type resType = op.getResultType(); - if (resType.isa()) - return rewriter.notifyMatchFailure(op, - "did not expect a VectorType result"); - bool isInt = resType.isa(); - // Use iterator index 0. - int64_t iterIndex = 0; - SmallVector iMap = op.getIndexingMapsArray(); - std::optional lookupLhs = getResultIndex(iMap[0], iterIndex); - std::optional lookupRhs = getResultIndex(iMap[1], iterIndex); - if (!lookupLhs.has_value()) - return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { - diag << "expected iterIndex=" << iterIndex << "to map to a LHS dimension"; - }); - if (!lookupRhs.has_value()) - return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { - diag << "expected iterIndex=" << iterIndex << "to map to a RHS dimension"; - }); - int64_t lhsIndex = *lookupLhs; - int64_t rhsIndex = *lookupRhs; - int64_t dimSize = lhsType.getDimSize(lhsIndex); - if (dimSize != rhsType.getDimSize(rhsIndex)) - return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { - diag << "expect LHS dimension " << lhsIndex - << " to have the same size as RHS dimension " << rhsIndex; - }); - // Base case. - if (lhsType.getRank() == 1) { - if (rhsType.getRank() != 1) - return rewriter.notifyMatchFailure( - op, "When LHS has rank 1, expected also RHS to have rank 1"); - Value m = createMul(loc, op.getLhs(), op.getRhs(), isInt, rewriter); - auto kind = vector::CombiningKind::ADD; - - Value acc = op.getAcc(); - Operation *reductionOp = - acc ? rewriter.create(loc, kind, m, acc) - : rewriter.create(loc, kind, m); - return maskOperation(rewriter, reductionOp, mask)->getResult(0); - } - // Construct new iterator types and affine map array attribute. - std::array lowIndexingMaps = { - adjustMap(iMap[0], iterIndex, rewriter), - adjustMap(iMap[1], iterIndex, rewriter), - adjustMap(iMap[2], iterIndex, rewriter)}; - auto lowAffine = rewriter.getAffineMapArrayAttr(lowIndexingMaps); - auto lowIter = - rewriter.getArrayAttr(adjustIter(op.getIteratorTypes(), iterIndex)); - // Unroll into a series of lower dimensional vector.contract ops. - // By feeding the initial accumulator into the first contraction, - // and the result of each contraction into the next, eventually - // the sum of all reductions is computed. - Value result = op.getAcc(); - for (int64_t d = 0; d < dimSize; ++d) { - auto lhs = reshapeLoad(loc, op.getLhs(), lhsType, lhsIndex, d, rewriter); - auto rhs = reshapeLoad(loc, op.getRhs(), rhsType, rhsIndex, d, rewriter); - Value newMask; - if (mask) - newMask = reshapeLoad(loc, mask, cast(mask.getType()), - iterIndex, d, rewriter); - - Operation *newContract = rewriter.create( - loc, lhs, rhs, result, lowAffine, lowIter); - result = maskOperation(rewriter, newContract, newMask)->getResult(0); - } - return result; -} - -} // namespace mlir - -/// Progressive lowering of transfer_read. This pattern supports lowering of -/// `vector.transfer_read` to a combination of `vector.load` and -/// `vector.broadcast` if all of the following hold: -/// - Stride of most minor memref dimension must be 1. -/// - Out-of-bounds masking is not required. -/// - If the memref's element type is a vector type then it coincides with the -/// result type. -/// - The permutation map doesn't perform permutation (broadcasting is allowed). -struct TransferReadToVectorLoadLowering - : public OpRewritePattern { - TransferReadToVectorLoadLowering(MLIRContext *context, - std::optional maxRank, - PatternBenefit benefit = 1) - : OpRewritePattern(context, benefit), - maxTransferRank(maxRank) {} - - LogicalResult matchAndRewrite(vector::TransferReadOp read, - PatternRewriter &rewriter) const override { - if (maxTransferRank && read.getVectorType().getRank() > *maxTransferRank) - return failure(); - - SmallVector broadcastedDims; - // Permutations are handled by VectorToSCF or - // populateVectorTransferPermutationMapLoweringPatterns. - // We let the 0-d corner case pass-through as it is supported. - if (!read.getPermutationMap().isMinorIdentityWithBroadcasting( - &broadcastedDims)) - return failure(); - - auto memRefType = read.getShapedType().dyn_cast(); - if (!memRefType) - return failure(); - - // Non-unit strides are handled by VectorToSCF. - if (!vector::isLastMemrefDimUnitStride(memRefType)) - return failure(); - - // If there is broadcasting involved then we first load the unbroadcasted - // vector, and then broadcast it with `vector.broadcast`. - ArrayRef vectorShape = read.getVectorType().getShape(); - SmallVector unbroadcastedVectorShape(vectorShape.begin(), - vectorShape.end()); - for (unsigned i : broadcastedDims) - unbroadcastedVectorShape[i] = 1; - VectorType unbroadcastedVectorType = VectorType::get( - unbroadcastedVectorShape, read.getVectorType().getElementType()); - - // `vector.load` supports vector types as memref's elements only when the - // resulting vector type is the same as the element type. - auto memrefElTy = memRefType.getElementType(); - if (memrefElTy.isa() && memrefElTy != unbroadcastedVectorType) - return failure(); - - // Otherwise, element types of the memref and the vector must match. - if (!memrefElTy.isa() && - memrefElTy != read.getVectorType().getElementType()) - return failure(); - - // Out-of-bounds dims are handled by MaterializeTransferMask. - if (read.hasOutOfBoundsDim()) - return failure(); - - // Create vector load op. - Operation *loadOp; - if (read.getMask()) { - Value fill = rewriter.create( - read.getLoc(), unbroadcastedVectorType, read.getPadding()); - loadOp = rewriter.create( - read.getLoc(), unbroadcastedVectorType, read.getSource(), - read.getIndices(), read.getMask(), fill); - } else { - loadOp = rewriter.create( - read.getLoc(), unbroadcastedVectorType, read.getSource(), - read.getIndices()); - } - - // Insert a broadcasting op if required. - if (!broadcastedDims.empty()) { - rewriter.replaceOpWithNewOp( - read, read.getVectorType(), loadOp->getResult(0)); - } else { - rewriter.replaceOp(read, loadOp->getResult(0)); - } - - return success(); - } - - std::optional maxTransferRank; -}; - -/// Replace a 0-d vector.load with a memref.load + vector.broadcast. -// TODO: we shouldn't cross the vector/scalar domains just for this -// but atm we lack the infra to avoid it. Possible solutions include: -// - go directly to LLVM + bitcast -// - introduce a bitcast op and likely a new pointer dialect -// - let memref.load/store additionally support the 0-d vector case -// There are still deeper data layout issues lingering even in this -// trivial case (for architectures for which this matters). -struct VectorLoadToMemrefLoadLowering - : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(vector::LoadOp loadOp, - PatternRewriter &rewriter) const override { - auto vecType = loadOp.getVectorType(); - if (vecType.getNumElements() != 1) - return failure(); - auto memrefLoad = rewriter.create( - loadOp.getLoc(), loadOp.getBase(), loadOp.getIndices()); - rewriter.replaceOpWithNewOp(loadOp, vecType, - memrefLoad); - return success(); - } -}; - -/// Replace a 0-d vector.store with a vector.extractelement + memref.store. -struct VectorStoreToMemrefStoreLowering - : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(vector::StoreOp storeOp, - PatternRewriter &rewriter) const override { - auto vecType = storeOp.getVectorType(); - if (vecType.getNumElements() != 1) - return failure(); - Value extracted; - if (vecType.getRank() == 0) { - // TODO: Unifiy once ExtractOp supports 0-d vectors. - extracted = rewriter.create( - storeOp.getLoc(), storeOp.getValueToStore()); - } else { - SmallVector indices(vecType.getRank(), 0); - extracted = rewriter.create( - storeOp.getLoc(), storeOp.getValueToStore(), indices); - } - - rewriter.replaceOpWithNewOp( - storeOp, extracted, storeOp.getBase(), storeOp.getIndices()); - return success(); - } -}; - -/// Progressive lowering of transfer_write. This pattern supports lowering of -/// `vector.transfer_write` to `vector.store` if all of the following hold: -/// - Stride of most minor memref dimension must be 1. -/// - Out-of-bounds masking is not required. -/// - If the memref's element type is a vector type then it coincides with the -/// type of the written value. -/// - The permutation map is the minor identity map (neither permutation nor -/// broadcasting is allowed). -struct TransferWriteToVectorStoreLowering - : public OpRewritePattern { - TransferWriteToVectorStoreLowering(MLIRContext *context, - std::optional maxRank, - PatternBenefit benefit = 1) - : OpRewritePattern(context, benefit), - maxTransferRank(maxRank) {} - - LogicalResult matchAndRewrite(vector::TransferWriteOp write, - PatternRewriter &rewriter) const override { - if (maxTransferRank && write.getVectorType().getRank() > *maxTransferRank) - return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { - diag << "rank exceeds maxTransferRank: " << write; - }); - - // Permutations are handled by VectorToSCF or - // populateVectorTransferPermutationMapLoweringPatterns. - if ( // pass-through for the 0-d corner case. - !write.getPermutationMap().isMinorIdentity()) - return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { - diag << "permutation map is not minor identity: " << write; - }); - - auto memRefType = write.getShapedType().dyn_cast(); - if (!memRefType) - return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { - diag << "not a memref type: " << write; - }); - - // Non-unit strides are handled by VectorToSCF. - if (!vector::isLastMemrefDimUnitStride(memRefType)) - return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { - diag << "most minor stride is not 1: " << write; - }); - - // `vector.store` supports vector types as memref's elements only when the - // type of the vector value being written is the same as the element type. - auto memrefElTy = memRefType.getElementType(); - if (memrefElTy.isa() && memrefElTy != write.getVectorType()) - return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { - diag << "elemental type mismatch: " << write; - }); - - // Otherwise, element types of the memref and the vector must match. - if (!memrefElTy.isa() && - memrefElTy != write.getVectorType().getElementType()) - return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { - diag << "elemental type mismatch: " << write; - }); - - // Out-of-bounds dims are handled by MaterializeTransferMask. - if (write.hasOutOfBoundsDim()) - return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { - diag << "out of bounds dim: " << write; - }); - if (write.getMask()) { - rewriter.replaceOpWithNewOp( - write, write.getSource(), write.getIndices(), write.getMask(), - write.getVector()); - } else { - rewriter.replaceOpWithNewOp( - write, write.getVector(), write.getSource(), write.getIndices()); - } - return success(); - } - - std::optional maxTransferRank; -}; - // Returns the values in `arrayAttr` as an integer vector. static SmallVector getIntValueVector(ArrayAttr arrayAttr) { return llvm::to_vector<4>( @@ -2863,202 +1027,6 @@ class DropInnerMostUnitDims : public OpRewritePattern { } }; -namespace { - -/// This function checks to see if the vector combining kind -/// is consistent with the integer or float element type. -static bool isValidKind(bool isInt, vector::CombiningKind kind) { - using vector::CombiningKind; - enum class KindType { FLOAT, INT, INVALID }; - KindType type{KindType::INVALID}; - switch (kind) { - case CombiningKind::MINF: - case CombiningKind::MAXF: - type = KindType::FLOAT; - break; - case CombiningKind::MINUI: - case CombiningKind::MINSI: - case CombiningKind::MAXUI: - case CombiningKind::MAXSI: - case CombiningKind::AND: - case CombiningKind::OR: - case CombiningKind::XOR: - type = KindType::INT; - break; - case CombiningKind::ADD: - case CombiningKind::MUL: - type = isInt ? KindType::INT : KindType::FLOAT; - break; - } - bool isValidIntKind = (type == KindType::INT) && isInt; - bool isValidFloatKind = (type == KindType::FLOAT) && (!isInt); - return (isValidIntKind || isValidFloatKind); -} - -/// This function constructs the appropriate integer or float -/// operation given the vector combining kind and operands. The -/// supported int operations are : add, mul, min (signed/unsigned), -/// max(signed/unsigned), and, or, xor. The supported float -/// operations are : add, mul, min and max. -static Value genOperator(Location loc, Value x, Value y, - vector::CombiningKind kind, - PatternRewriter &rewriter) { - using vector::CombiningKind; - - auto elType = x.getType().cast().getElementType(); - bool isInt = elType.isIntOrIndex(); - - Value combinedResult{nullptr}; - switch (kind) { - case CombiningKind::ADD: - if (isInt) - combinedResult = rewriter.create(loc, x, y); - else - combinedResult = rewriter.create(loc, x, y); - break; - case CombiningKind::MUL: - if (isInt) - combinedResult = rewriter.create(loc, x, y); - else - combinedResult = rewriter.create(loc, x, y); - break; - case CombiningKind::MINUI: - combinedResult = rewriter.create(loc, x, y); - break; - case CombiningKind::MINSI: - combinedResult = rewriter.create(loc, x, y); - break; - case CombiningKind::MAXUI: - combinedResult = rewriter.create(loc, x, y); - break; - case CombiningKind::MAXSI: - combinedResult = rewriter.create(loc, x, y); - break; - case CombiningKind::AND: - combinedResult = rewriter.create(loc, x, y); - break; - case CombiningKind::OR: - combinedResult = rewriter.create(loc, x, y); - break; - case CombiningKind::XOR: - combinedResult = rewriter.create(loc, x, y); - break; - case CombiningKind::MINF: - combinedResult = rewriter.create(loc, x, y); - break; - case CombiningKind::MAXF: - combinedResult = rewriter.create(loc, x, y); - break; - } - return combinedResult; -} - -/// Convert vector.scan op into arith ops and -/// vector.insert_strided_slice/extract_strided_slice -/// -/// Ex: -/// ``` -/// %0:2 = vector.scan , %arg0, %arg1 {inclusive = true, reduction_dim = -/// 1} : -/// (vector<2x3xi32>, vector<2xi32>) to (vector<2x3xi32>, vector<2xi32>) -/// ``` -/// Gets converted to: -/// ``` -/// %cst = arith.constant dense<0> : vector<2x3xi32> -/// %0 = vector.extract_strided_slice %arg0 {offsets = [0, 0], sizes = [2, 1], -/// strides = [1, 1]} : vector<2x3xi32> to vector<2x1xi32> %1 = -/// vector.insert_strided_slice %0, %cst {offsets = [0, 0], strides = [1, 1]} -/// : vector<2x1xi32> into vector<2x3xi32> %2 = vector.extract_strided_slice -/// %arg0 {offsets = [0, 1], sizes = [2, 1], strides = [1, 1]} : -/// vector<2x3xi32> to vector<2x1xi32> %3 = arith.muli %0, %2 : -/// vector<2x1xi32> %4 = vector.insert_strided_slice %3, %1 {offsets = [0, 1], -/// strides = [1, 1]} : vector<2x1xi32> into vector<2x3xi32> %5 = -/// vector.extract_strided_slice %arg0 {offsets = [0, 2], sizes = [2, 1], -/// strides = [1, 1]} : vector<2x3xi32> to vector<2x1xi32> %6 = arith.muli %3, -/// %5 : vector<2x1xi32> %7 = vector.insert_strided_slice %6, %4 {offsets = -/// [0, 2], strides = [1, 1]} : vector<2x1xi32> into vector<2x3xi32> %8 = -/// vector.shape_cast %6 : vector<2x1xi32> to vector<2xi32> return %7, %8 : -/// vector<2x3xi32>, vector<2xi32> -/// ``` -struct ScanToArithOps : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(vector::ScanOp scanOp, - PatternRewriter &rewriter) const override { - auto loc = scanOp.getLoc(); - VectorType destType = scanOp.getDestType(); - ArrayRef destShape = destType.getShape(); - auto elType = destType.getElementType(); - bool isInt = elType.isIntOrIndex(); - if (!isValidKind(isInt, scanOp.getKind())) - return failure(); - - VectorType resType = VectorType::get(destShape, elType); - Value result = rewriter.create( - loc, resType, rewriter.getZeroAttr(resType)); - int64_t reductionDim = scanOp.getReductionDim(); - bool inclusive = scanOp.getInclusive(); - int64_t destRank = destType.getRank(); - VectorType initialValueType = scanOp.getInitialValueType(); - int64_t initialValueRank = initialValueType.getRank(); - - SmallVector reductionShape(destShape.begin(), destShape.end()); - reductionShape[reductionDim] = 1; - VectorType reductionType = VectorType::get(reductionShape, elType); - SmallVector offsets(destRank, 0); - SmallVector strides(destRank, 1); - SmallVector sizes(destShape.begin(), destShape.end()); - sizes[reductionDim] = 1; - ArrayAttr scanSizes = rewriter.getI64ArrayAttr(sizes); - ArrayAttr scanStrides = rewriter.getI64ArrayAttr(strides); - - Value lastOutput, lastInput; - for (int i = 0; i < destShape[reductionDim]; i++) { - offsets[reductionDim] = i; - ArrayAttr scanOffsets = rewriter.getI64ArrayAttr(offsets); - Value input = rewriter.create( - loc, reductionType, scanOp.getSource(), scanOffsets, scanSizes, - scanStrides); - Value output; - if (i == 0) { - if (inclusive) { - output = input; - } else { - if (initialValueRank == 0) { - // ShapeCastOp cannot handle 0-D vectors - output = rewriter.create( - loc, input.getType(), scanOp.getInitialValue()); - } else { - output = rewriter.create( - loc, input.getType(), scanOp.getInitialValue()); - } - } - } else { - Value y = inclusive ? input : lastInput; - output = genOperator(loc, lastOutput, y, scanOp.getKind(), rewriter); - assert(output != nullptr); - } - result = rewriter.create( - loc, output, result, offsets, strides); - lastOutput = output; - lastInput = input; - } - - Value reduction; - if (initialValueRank == 0) { - Value v = rewriter.create(loc, lastOutput, 0); - reduction = - rewriter.create(loc, initialValueType, v); - } else { - reduction = rewriter.create(loc, initialValueType, - lastOutput); - } - - rewriter.replaceOp(scanOp, {result, reduction}); - return success(); - } -}; - /// Canonicalization of a `vector.contraction %a, %b, %c` with row-major matmul /// semantics to a contraction suitable for MMT (matrix matrix multiplication /// with the RHS transposed) lowering. @@ -3157,132 +1125,6 @@ struct CanonicalizeContractMatmulToMMT final FilterConstraintType filter; }; -/// Flattens 2 or more dimensional `vector.gather` ops by unrolling the -/// outermost dimension. For example: -/// ``` -/// %g = vector.gather %base[%c0][%v], %mask, %pass_thru : -/// ... into vector<2x3xf32> -/// -/// ==> -/// -/// %0 = arith.constant dense<0.0> : vector<2x3xf32> -/// %g0 = vector.gather %base[%c0][%v0], %mask0, %pass_thru0 : ... -/// %1 = vector.insert %g0, %0 [0] : vector<3xf32> into vector<2x3xf32> -/// %g1 = vector.gather %base[%c0][%v1], %mask1, %pass_thru1 : ... -/// %g = vector.insert %g1, %1 [1] : vector<3xf32> into vector<2x3xf32> -/// ``` -/// -/// When applied exhaustively, this will produce a sequence of 1-d gather ops. -struct FlattenGather : OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(vector::GatherOp op, - PatternRewriter &rewriter) const override { - VectorType resultTy = op.getType(); - if (resultTy.getRank() < 2) - return rewriter.notifyMatchFailure(op, "already flat"); - - Location loc = op.getLoc(); - Value indexVec = op.getIndexVec(); - Value maskVec = op.getMask(); - Value passThruVec = op.getPassThru(); - - Value result = rewriter.create( - loc, resultTy, rewriter.getZeroAttr(resultTy)); - - Type subTy = VectorType::get(resultTy.getShape().drop_front(), - resultTy.getElementType()); - - for (int64_t i = 0, e = resultTy.getShape().front(); i < e; ++i) { - int64_t thisIdx[1] = {i}; - - Value indexSubVec = - rewriter.create(loc, indexVec, thisIdx); - Value maskSubVec = - rewriter.create(loc, maskVec, thisIdx); - Value passThruSubVec = - rewriter.create(loc, passThruVec, thisIdx); - Value subGather = rewriter.create( - loc, subTy, op.getBase(), op.getIndices(), indexSubVec, maskSubVec, - passThruSubVec); - result = - rewriter.create(loc, subGather, result, thisIdx); - } - - rewriter.replaceOp(op, result); - return success(); - } -}; - -/// Turns 1-d `vector.gather` into a scalarized sequence of `vector.loads` or -/// `tensor.extract`s. To avoid out-of-bounds memory accesses, these -/// loads/extracts are made conditional using `scf.if` ops. -struct Gather1DToConditionalLoads : OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(vector::GatherOp op, - PatternRewriter &rewriter) const override { - VectorType resultTy = op.getType(); - if (resultTy.getRank() != 1) - return rewriter.notifyMatchFailure(op, "unsupported rank"); - - Location loc = op.getLoc(); - Type elemTy = resultTy.getElementType(); - // Vector type with a single element. Used to generate `vector.loads`. - VectorType elemVecTy = VectorType::get({1}, elemTy); - - Value condMask = op.getMask(); - Value base = op.getBase(); - Value indexVec = rewriter.createOrFold( - loc, op.getIndexVectorType().clone(rewriter.getIndexType()), - op.getIndexVec()); - auto baseOffsets = llvm::to_vector(op.getIndices()); - Value lastBaseOffset = baseOffsets.back(); - - Value result = op.getPassThru(); - - // Emit a conditional access for each vector element. - for (int64_t i = 0, e = resultTy.getNumElements(); i < e; ++i) { - int64_t thisIdx[1] = {i}; - Value condition = - rewriter.create(loc, condMask, thisIdx); - Value index = rewriter.create(loc, indexVec, thisIdx); - baseOffsets.back() = - rewriter.createOrFold(loc, lastBaseOffset, index); - - auto loadBuilder = [&](OpBuilder &b, Location loc) { - Value extracted; - if (isa(base.getType())) { - // `vector.load` does not support scalar result; emit a vector load - // and extract the single result instead. - Value load = - b.create(loc, elemVecTy, base, baseOffsets); - int64_t zeroIdx[1] = {0}; - extracted = b.create(loc, load, zeroIdx); - } else { - extracted = b.create(loc, base, baseOffsets); - } - - Value newResult = - b.create(loc, extracted, result, thisIdx); - b.create(loc, newResult); - }; - auto passThruBuilder = [result](OpBuilder &b, Location loc) { - b.create(loc, result); - }; - - result = - rewriter - .create(loc, condition, /*thenBuilder=*/loadBuilder, - /*elseBuilder=*/passThruBuilder) - .getResult(0); - } - - rewriter.replaceOp(op, result); - return success(); - } -}; - } // namespace void mlir::vector::populateVectorMaskMaterializationPatterns( @@ -3307,33 +1149,6 @@ void mlir::vector::populateBubbleVectorBitCastOpPatterns( benefit); } -void mlir::vector::populateVectorBroadcastLoweringPatterns( - RewritePatternSet &patterns, PatternBenefit benefit) { - patterns.add(patterns.getContext(), benefit); -} - -void mlir::vector::populateVectorMaskOpLoweringPatterns( - RewritePatternSet &patterns, PatternBenefit benefit) { - patterns.add( - patterns.getContext(), benefit); -} - -void mlir::vector::populateVectorShapeCastLoweringPatterns( - RewritePatternSet &patterns, PatternBenefit benefit) { - patterns.add( - patterns.getContext(), benefit); -} - -void mlir::vector::populateVectorContractLoweringPatterns( - RewritePatternSet &patterns, VectorTransformsOptions options, - PatternBenefit benefit) { - patterns.add(patterns.getContext(), benefit); - patterns.add( - options, patterns.getContext(), benefit); -} - void mlir::vector::populateVectorContractCanonicalizeMatmulToMMT( RewritePatternSet &patterns, std::function constraint, @@ -3342,13 +1157,6 @@ void mlir::vector::populateVectorContractCanonicalizeMatmulToMMT( std::move(constraint)); } -void mlir::vector::populateVectorTransposeLoweringPatterns( - RewritePatternSet &patterns, VectorTransformsOptions options, - PatternBenefit benefit) { - patterns.add( - options, patterns.getContext(), benefit); -} - void mlir::vector::populateVectorReductionToContractPatterns( RewritePatternSet &patterns, PatternBenefit benefit) { patterns.add(patterns.getContext(), benefit); } -void mlir::vector::populateVectorTransferLoweringPatterns( - RewritePatternSet &patterns, std::optional maxTransferRank, - PatternBenefit benefit) { - patterns.add(patterns.getContext(), - maxTransferRank, benefit); - patterns - .add( - patterns.getContext(), benefit); -} - -void mlir::vector::populateVectorScanLoweringPatterns( - RewritePatternSet &patterns, PatternBenefit benefit) { - patterns.add(patterns.getContext(), benefit); -} - -void mlir::vector::populateVectorGatherLoweringPatterns( - RewritePatternSet &patterns, PatternBenefit benefit) { - patterns.add(patterns.getContext(), - benefit); -} - //===----------------------------------------------------------------------===// // TableGen'd enum attribute definitions //===----------------------------------------------------------------------===// diff --git a/mlir/lib/IR/AffineExpr.cpp b/mlir/lib/IR/AffineExpr.cpp index 554452cb265fd..8564bacedd21c 100644 --- a/mlir/lib/IR/AffineExpr.cpp +++ b/mlir/lib/IR/AffineExpr.cpp @@ -1290,7 +1290,7 @@ void SimpleAffineExprFlattener::addLocalVariableSemiAffine( // A floordiv is thus flattened by introducing a new local variable q, and // replacing that expression with 'q' while adding the constraints // c * q <= expr <= c * q + c - 1 to localVarCst (done by -// FlatAffineConstraints::addLocalFloorDiv). +// IntegerRelation::addLocalFloorDiv). // // A ceildiv is similarly flattened: // t = expr ceildiv c <=> t = (expr + c - 1) floordiv c diff --git a/mlir/lib/IR/AffineMap.cpp b/mlir/lib/IR/AffineMap.cpp index c924d2bcde556..6c9034d446341 100644 --- a/mlir/lib/IR/AffineMap.cpp +++ b/mlir/lib/IR/AffineMap.cpp @@ -8,6 +8,7 @@ #include "mlir/IR/AffineMap.h" #include "AffineMapDetail.h" +#include "mlir/IR/AffineExpr.h" #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/Support/LogicalResult.h" @@ -15,8 +16,10 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" +#include #include #include #include @@ -467,6 +470,15 @@ AffineMap::replace(const DenseMap &map) const { return AffineMap::inferFromExprList(newResults).front(); } +AffineMap AffineMap::dropResults(const llvm::SmallBitVector &positions) const { + auto exprs = llvm::to_vector<4>(getResults()); + // TODO: this is a pretty terrible API .. is there anything better? + for (auto pos = positions.find_last(); pos != -1; + pos = positions.find_prev(pos)) + exprs.erase(exprs.begin() + pos); + return AffineMap::get(getNumDims(), getNumSymbols(), exprs, getContext()); +} + AffineMap AffineMap::compose(AffineMap map) const { assert(getNumDims() == map.getNumResults() && "Number of results mismatch"); // Prepare `map` by concatenating the symbols and rewriting its exprs. @@ -732,13 +744,18 @@ static AffineMap projectCommonImpl(AffineMap map, replacements.reserve(numDimOrSym); auto createNewDimOrSym = (isDim) ? getAffineDimExpr : getAffineSymbolExpr; - auto replaceDims = [](AffineExpr e, ArrayRef replacements) { + + using replace_fn_ty = + std::function)>; + replace_fn_ty replaceDims = [](AffineExpr e, + ArrayRef replacements) { return e.replaceDims(replacements); }; - auto replaceSymbols = [](AffineExpr e, ArrayRef replacements) { + replace_fn_ty replaceSymbols = [](AffineExpr e, + ArrayRef replacements) { return e.replaceSymbols(replacements); }; - auto replaceNewDimOrSym = (isDim) ? replaceDims : replaceSymbols; + replace_fn_ty replaceNewDimOrSym = (isDim) ? replaceDims : replaceSymbols; MLIRContext *context = map.getContext(); int64_t newNumDimOrSym = 0; @@ -808,6 +825,14 @@ llvm::SmallBitVector mlir::getUnusedSymbolsBitVector(ArrayRef maps) { return numSymbolsBitVector; } +AffineMap +mlir::expandDimsToRank(AffineMap map, int64_t rank, + const llvm::SmallBitVector &projectedDimensions) { + auto id = AffineMap::getMultiDimIdentityMap(rank, map.getContext()); + AffineMap proj = id.dropResults(projectedDimensions); + return map.compose(proj); +} + //===----------------------------------------------------------------------===// // MutableAffineMap. //===----------------------------------------------------------------------===// @@ -829,7 +854,7 @@ bool MutableAffineMap::isMultipleOf(unsigned idx, int64_t factor) const { if (results[idx].isMultipleOf(factor)) return true; - // TODO: use simplifyAffineExpr and FlatAffineConstraints to + // TODO: use simplifyAffineExpr and FlatAffineValueConstraints to // complete this (for a more powerful analysis). return false; } diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp index dd3112516fc51..75448955f3123 100644 --- a/mlir/lib/IR/AsmPrinter.cpp +++ b/mlir/lib/IR/AsmPrinter.cpp @@ -1369,7 +1369,7 @@ void SSANameState::printValueID(Value value, bool printResultNo, void SSANameState::printOperationID(Operation *op, raw_ostream &stream) const { auto it = operationIDs.find(op); if (it == operationIDs.end()) { - stream << "<>"; + stream << "<>"; } else { stream << '%' << it->second; } diff --git a/mlir/lib/Target/LLVMIR/ModuleImport.cpp b/mlir/lib/Target/LLVMIR/ModuleImport.cpp index d3ac7dcc17554..707f28d6c3641 100644 --- a/mlir/lib/Target/LLVMIR/ModuleImport.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleImport.cpp @@ -1036,6 +1036,12 @@ FailureOr ModuleImport::convertConstant(llvm::Constant *constant) { return builder.create(loc, type).getResult(); } + // Convert poison. + if (auto *poisonVal = dyn_cast(constant)) { + Type type = convertType(poisonVal->getType()); + return builder.create(loc, type).getResult(); + } + // Convert undef. if (auto *undefVal = dyn_cast(constant)) { Type type = convertType(undefVal->getType()); diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir index 133999eff1ec3..476131b262fb9 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir @@ -237,14 +237,9 @@ func.func @test_simple_f32(%arg0: tensor<1xf32>) -> () { %19 = "tosa.sigmoid"(%0) : (tensor<1xf32>) -> tensor<1xf32> // CHECK: linalg.generic - // CHECK: arith.constant 0.000000e+00 - // CHECK: arith.constant 5.000000e-01 // CHECK: arith.constant -2.14748365E+9 // CHECK: arith.constant 2.14748365E+9 - // CHECK: arith.addf - // CHECK: arith.subf - // CHECK: arith.cmpf olt - // CHECK: select + // CHECK: math.roundeven // CHECK: arith.minf // CHECK: arith.maxf // CHECK: arith.fptosi diff --git a/mlir/test/Dialect/Affine/SuperVectorize/compose_maps.mlir b/mlir/test/Dialect/Affine/SuperVectorize/compose_maps.mlir index 3b7820cec67a2..b53fc55fdac91 100644 --- a/mlir/test/Dialect/Affine/SuperVectorize/compose_maps.mlir +++ b/mlir/test/Dialect/Affine/SuperVectorize/compose_maps.mlir @@ -159,3 +159,11 @@ func.func @multi_symbols() { "test_affine_map"() { affine_map = affine_map<(d0, d1)[s0, s1] -> (d0 + 1 + s1, d1 - 1 - s0)> } : () -> () return } + +// ----- + +// CHECK-LABEL: @no_affine_maps +func.func @no_affine_maps() { + // CHECK: return + return +} diff --git a/mlir/test/Dialect/Arith/emulate-wide-int-canonicalization.mlir b/mlir/test/Dialect/Arith/emulate-wide-int-canonicalization.mlir new file mode 100644 index 0000000000000..0c95ab8284afa --- /dev/null +++ b/mlir/test/Dialect/Arith/emulate-wide-int-canonicalization.mlir @@ -0,0 +1,14 @@ +// RUN: mlir-opt --arith-emulate-wide-int="widest-int-supported=32" --canonicalize %s | FileCheck %s + +// Check that we can fold away the 'hi' part calculation when it is know to be zero. +// +// CHECK-LABEL: func @uitofp_i16_ext_f64 +// CHECK-SAME: ([[ARG:%.+]]: i16) -> f64 +// CHECK-NEXT: [[EXT:%.+]] = arith.extui [[ARG]] : i16 to i32 +// CHECK-NEXT: [[FP:%.+]] = arith.uitofp [[EXT]] : i32 to f64 +// CHECK-NEXT: return [[FP]] : f64 +func.func @uitofp_i16_ext_f64(%a : i16) -> f64 { + %ext = arith.extui %a : i16 to i64 + %r = arith.uitofp %ext : i64 to f64 + return %r : f64 +} diff --git a/mlir/test/Dialect/Arith/emulate-wide-int.mlir b/mlir/test/Dialect/Arith/emulate-wide-int.mlir index 80edc6f2ad001..9fb5478d7e94f 100644 --- a/mlir/test/Dialect/Arith/emulate-wide-int.mlir +++ b/mlir/test/Dialect/Arith/emulate-wide-int.mlir @@ -908,3 +908,102 @@ func.func @xori_vector_a_b(%a : vector<3xi64>, %b : vector<3xi64>) -> vector<3xi %x = arith.xori %a, %b : vector<3xi64> return %x : vector<3xi64> } + +// CHECK-LABEL: func @uitofp_i64_f64 +// CHECK-SAME: ([[ARG:%.+]]: vector<2xi32>) -> f64 +// CHECK-NEXT: [[LOW:%.+]] = vector.extract [[ARG]][0] : vector<2xi32> +// CHECK-NEXT: [[HI:%.+]] = vector.extract [[ARG]][1] : vector<2xi32> +// CHECK-NEXT: [[CST0:%.+]] = arith.constant 0 : i32 +// CHECK-NEXT: [[HIEQ0:%.+]] = arith.cmpi eq, [[HI]], [[CST0]] : i32 +// CHECK-NEXT: [[LOWFP:%.+]] = arith.uitofp [[LOW]] : i32 to f64 +// CHECK-NEXT: [[HIFP:%.+]] = arith.uitofp [[HI]] : i32 to f64 +// CHECK-NEXT: [[POW:%.+]] = arith.constant 0x41F0000000000000 : f64 +// CHECK-NEXT: [[RESHI:%.+]] = arith.mulf [[HIFP]], [[POW]] : f64 +// CHECK-NEXT: [[RES:%.+]] = arith.addf [[LOWFP]], [[RESHI]] : f64 +// CHECK-NEXT: [[SEL:%.+]] = arith.select [[HIEQ0]], [[LOWFP]], [[RES]] : f64 +// CHECK-NEXT: return [[SEL]] : f64 +func.func @uitofp_i64_f64(%a : i64) -> f64 { + %r = arith.uitofp %a : i64 to f64 + return %r : f64 +} + +// CHECK-LABEL: func @uitofp_i64_f64_vector +// CHECK-SAME: ([[ARG:%.+]]: vector<3x2xi32>) -> vector<3xf64> +// CHECK-NEXT: [[EXTLOW:%.+]] = vector.extract_strided_slice [[ARG]] {offsets = [0, 0], sizes = [3, 1], strides = [1, 1]} : vector<3x2xi32> to vector<3x1xi32> +// CHECK-NEXT: [[EXTHI:%.+]] = vector.extract_strided_slice [[ARG]] {offsets = [0, 1], sizes = [3, 1], strides = [1, 1]} : vector<3x2xi32> to vector<3x1xi32> +// CHECK-NEXT: [[LOW:%.+]] = vector.shape_cast [[EXTLOW]] : vector<3x1xi32> to vector<3xi32> +// CHECK-NEXT: [[HI:%.+]] = vector.shape_cast [[EXTHI]] : vector<3x1xi32> to vector<3xi32> +// CHECK-NEXT: [[CST0:%.+]] = arith.constant dense<0> : vector<3xi32> +// CHECK-NEXT: [[HIEQ0:%.+]] = arith.cmpi eq, [[HI]], [[CST0]] : vector<3xi32> +// CHECK-NEXT: [[LOWFP:%.+]] = arith.uitofp [[LOW]] : vector<3xi32> to vector<3xf64> +// CHECK-NEXT: [[HIFP:%.+]] = arith.uitofp [[HI]] : vector<3xi32> to vector<3xf64> +// CHECK-NEXT: [[POW:%.+]] = arith.constant dense<0x41F0000000000000> : vector<3xf64> +// CHECK-NEXT: [[RESHI:%.+]] = arith.mulf [[HIFP]], [[POW]] : vector<3xf64> +// CHECK-NEXT: [[RES:%.+]] = arith.addf [[LOWFP]], [[RESHI]] : vector<3xf64> +// CHECK-NEXT: [[SEL:%.+]] = arith.select [[HIEQ0]], [[LOWFP]], [[RES]] : vector<3xi1>, vector<3xf64> +// CHECK-NEXT: return [[SEL]] : vector<3xf64> +func.func @uitofp_i64_f64_vector(%a : vector<3xi64>) -> vector<3xf64> { + %r = arith.uitofp %a : vector<3xi64> to vector<3xf64> + return %r : vector<3xf64> +} + +// CHECK-LABEL: func @uitofp_i64_f16 +// CHECK-SAME: ([[ARG:%.+]]: vector<2xi32>) -> f16 +// CHECK-NEXT: [[LOW:%.+]] = vector.extract [[ARG]][0] : vector<2xi32> +// CHECK-NEXT: [[HI:%.+]] = vector.extract [[ARG]][1] : vector<2xi32> +// CHECK-NEXT: [[CST0:%.+]] = arith.constant 0 : i32 +// CHECK-NEXT: [[HIEQ0:%.+]] = arith.cmpi eq, [[HI]], [[CST0]] : i32 +// CHECK-NEXT: [[LOWFP:%.+]] = arith.uitofp [[LOW]] : i32 to f16 +// CHECK-NEXT: [[HIFP:%.+]] = arith.uitofp [[HI]] : i32 to f16 +// CHECK-NEXT: [[POW:%.+]] = arith.constant 0x7C00 : f16 +// CHECK-NEXT: [[RESHI:%.+]] = arith.mulf [[HIFP]], [[POW]] : f16 +// CHECK-NEXT: [[RES:%.+]] = arith.addf [[LOWFP]], [[RESHI]] : f16 +// CHECK-NEXT: [[SEL:%.+]] = arith.select [[HIEQ0]], [[LOWFP]], [[RES]] : f16 +// CHECK-NEXT: return [[SEL]] : f16 +func.func @uitofp_i64_f16(%a : i64) -> f16 { + %r = arith.uitofp %a : i64 to f16 + return %r : f16 +} + +// CHECK-LABEL: func @sitofp_i64_f64 +// CHECK-SAME: ([[ARG:%.+]]: vector<2xi32>) -> f64 +// CHECK: [[VONES:%.+]] = arith.constant dense<-1> : vector<2xi32> +// CHECK: [[ONES1:%.+]] = vector.extract [[VONES]][0] : vector<2xi32> +// CHECK-NEXT: [[ONES2:%.+]] = vector.extract [[VONES]][1] : vector<2xi32> +// CHECK: arith.xori {{%.+}}, [[ONES1]] : i32 +// CHECK-NEXT: arith.xori {{%.+}}, [[ONES2]] : i32 +// CHECK: [[CST0:%.+]] = arith.constant 0 : i32 +// CHECK: [[HIEQ0:%.+]] = arith.cmpi eq, [[HI:%.+]], [[CST0]] : i32 +// CHECK-NEXT: [[LOWFP:%.+]] = arith.uitofp [[LOW:%.+]] : i32 to f64 +// CHECK-NEXT: [[HIFP:%.+]] = arith.uitofp [[HI]] : i32 to f64 +// CHECK-NEXT: [[POW:%.+]] = arith.constant 0x41F0000000000000 : f64 +// CHECK-NEXT: [[RESHI:%.+]] = arith.mulf [[HIFP]], [[POW]] : f64 +// CHECK-NEXT: [[RES:%.+]] = arith.addf [[LOWFP]], [[RESHI]] : f64 +// CHECK-NEXT: [[SEL:%.+]] = arith.select [[HIEQ0]], [[LOWFP]], [[RES]] : f64 +// CHECK-NEXT: [[NEG:%.+]] = arith.negf [[SEL]] : f64 +// CHECK-NEXT: [[FINAL:%.+]] = arith.select %{{.+}}, [[NEG]], [[SEL]] : f64 +// CHECK-NEXT: return [[FINAL]] : f64 +func.func @sitofp_i64_f64(%a : i64) -> f64 { + %r = arith.sitofp %a : i64 to f64 + return %r : f64 +} + +// CHECK-LABEL: func @sitofp_i64_f64_vector +// CHECK-SAME: ([[ARG:%.+]]: vector<3x2xi32>) -> vector<3xf64> +// CHECK: [[VONES:%.+]] = arith.constant dense<-1> : vector<3x2xi32> +// CHECK: arith.xori +// CHECK-NEXT: arith.xori +// CHECK: [[HIEQ0:%.+]] = arith.cmpi eq, [[HI:%.+]], [[CST0:%.+]] : vector<3xi32> +// CHECK-NEXT: [[LOWFP:%.+]] = arith.uitofp [[LOW:%.+]] : vector<3xi32> to vector<3xf64> +// CHECK-NEXT: [[HIFP:%.+]] = arith.uitofp [[HI:%.+]] : vector<3xi32> to vector<3xf64> +// CHECK-NEXT: [[POW:%.+]] = arith.constant dense<0x41F0000000000000> : vector<3xf64> +// CHECK-NEXT: [[RESHI:%.+]] = arith.mulf [[HIFP]], [[POW]] : vector<3xf64> +// CHECK-NEXT: [[RES:%.+]] = arith.addf [[LOWFP]], [[RESHI]] : vector<3xf64> +// CHECK-NEXT: [[SEL:%.+]] = arith.select [[HIEQ0]], [[LOWFP]], [[RES]] : vector<3xi1>, vector<3xf64> +// CHECK-NEXT: [[NEG:%.+]] = arith.negf [[SEL]] : vector<3xf64> +// CHECK-NEXT: [[FINAL:%.+]] = arith.select %{{.+}}, [[NEG]], [[SEL]] : vector<3xi1>, vector<3xf64> +// CHECK-NEXT: return [[FINAL]] : vector<3xf64> +func.func @sitofp_i64_f64_vector(%a : vector<3xi64>) -> vector<3xf64> { + %r = arith.sitofp %a : vector<3xi64> to vector<3xf64> + return %r : vector<3xf64> +} diff --git a/mlir/test/Dialect/LLVMIR/callgraph.mlir b/mlir/test/Dialect/LLVMIR/callgraph.mlir index edb5b35d126a5..ca1044b8288c4 100644 --- a/mlir/test/Dialect/LLVMIR/callgraph.mlir +++ b/mlir/test/Dialect/LLVMIR/callgraph.mlir @@ -58,33 +58,32 @@ module attributes {"test.name" = "Invoke call"} { // CHECK-DAG: -- Call-Edge : // CHECK: -- SCCs -- - llvm.mlir.global external constant @_ZTIi() : !llvm.ptr + llvm.mlir.global external constant @_ZTIi() : !llvm.ptr llvm.func @foo(%arg0: i32) -> !llvm.struct<(i32, f64, i32)> - llvm.func @bar(!llvm.ptr, !llvm.ptr, !llvm.ptr) + llvm.func @bar(!llvm.ptr, !llvm.ptr, !llvm.ptr) llvm.func @__gxx_personality_v0(...) -> i32 llvm.func @invokeLandingpad() -> i32 attributes { personality = @__gxx_personality_v0 } { %0 = llvm.mlir.constant(0 : i32) : i32 %1 = llvm.mlir.constant(3 : i32) : i32 %2 = llvm.mlir.constant("\01") : !llvm.array<1 x i8> - %3 = llvm.mlir.null : !llvm.ptr> - %4 = llvm.mlir.null : !llvm.ptr - %5 = llvm.mlir.addressof @_ZTIi : !llvm.ptr> - %6 = llvm.bitcast %5 : !llvm.ptr> to !llvm.ptr - %7 = llvm.mlir.constant(1 : i32) : i32 - %8 = llvm.alloca %7 x i8 : (i32) -> !llvm.ptr - %9 = llvm.invoke @foo(%7) to ^bb2 unwind ^bb1 : (i32) -> !llvm.struct<(i32, f64, i32)> + %3 = llvm.mlir.null : !llvm.ptr + %4 = llvm.mlir.null : !llvm.ptr + %5 = llvm.mlir.addressof @_ZTIi : !llvm.ptr + %6 = llvm.mlir.constant(1 : i32) : i32 + %7 = llvm.alloca %6 x i8 : (i32) -> !llvm.ptr + %8 = llvm.invoke @foo(%6) to ^bb2 unwind ^bb1 : (i32) -> !llvm.struct<(i32, f64, i32)> ^bb1: - %10 = llvm.landingpad cleanup (catch %3 : !llvm.ptr>) (catch %6 : !llvm.ptr) (filter %2 : !llvm.array<1 x i8>) : !llvm.struct<(ptr, i32)> - %11 = llvm.intr.eh.typeid.for %6 : (!llvm.ptr) -> i32 - llvm.resume %10 : !llvm.struct<(ptr, i32)> + %10 = llvm.landingpad cleanup (catch %3 : !llvm.ptr) (catch %5 : !llvm.ptr) (filter %2 : !llvm.array<1 x i8>) : !llvm.struct<(ptr, i32)> + %11 = llvm.intr.eh.typeid.for %5 : (!llvm.ptr) -> i32 + llvm.resume %10 : !llvm.struct<(ptr, i32)> ^bb2: - llvm.return %7 : i32 + llvm.return %6 : i32 ^bb3: - llvm.invoke @bar(%8, %6, %4) to ^bb2 unwind ^bb1 : (!llvm.ptr, !llvm.ptr, !llvm.ptr) -> () + llvm.invoke @bar(%7, %5, %4) to ^bb2 unwind ^bb1 : (!llvm.ptr, !llvm.ptr, !llvm.ptr) -> () ^bb4: llvm.return %0 : i32 diff --git a/mlir/test/Dialect/LLVMIR/canonicalize-typed-pointers.mlir b/mlir/test/Dialect/LLVMIR/canonicalize-typed-pointers.mlir new file mode 100644 index 0000000000000..2ae9727482fc3 --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/canonicalize-typed-pointers.mlir @@ -0,0 +1,86 @@ +// RUN: mlir-opt --pass-pipeline='builtin.module(llvm.func(canonicalize{test-convergence}))' %s -split-input-file | FileCheck %s + +// CHECK-LABEL: fold_bitcast +// CHECK-SAME: %[[a0:arg[0-9]+]] +// CHECK-NEXT: llvm.return %[[a0]] +llvm.func @fold_bitcast(%x : !llvm.ptr) -> !llvm.ptr { + %c = llvm.bitcast %x : !llvm.ptr to !llvm.ptr + llvm.return %c : !llvm.ptr +} + +// CHECK-LABEL: fold_bitcast2 +// CHECK-SAME: %[[a0:arg[0-9]+]] +// CHECK-NEXT: llvm.return %[[a0]] +llvm.func @fold_bitcast2(%x : !llvm.ptr) -> !llvm.ptr { + %c = llvm.bitcast %x : !llvm.ptr to !llvm.ptr + %d = llvm.bitcast %c : !llvm.ptr to !llvm.ptr + llvm.return %d : !llvm.ptr +} + +// ----- + +// CHECK-LABEL: fold_addrcast +// CHECK-SAME: %[[a0:arg[0-9]+]] +// CHECK-NEXT: llvm.return %[[a0]] +llvm.func @fold_addrcast(%x : !llvm.ptr) -> !llvm.ptr { + %c = llvm.addrspacecast %x : !llvm.ptr to !llvm.ptr + llvm.return %c : !llvm.ptr +} + +// CHECK-LABEL: fold_addrcast2 +// CHECK-SAME: %[[a0:arg[0-9]+]] +// CHECK-NEXT: llvm.return %[[a0]] +llvm.func @fold_addrcast2(%x : !llvm.ptr) -> !llvm.ptr { + %c = llvm.addrspacecast %x : !llvm.ptr to !llvm.ptr + %d = llvm.addrspacecast %c : !llvm.ptr to !llvm.ptr + llvm.return %d : !llvm.ptr +} + +// ----- + +// CHECK-LABEL: fold_gep +// CHECK-SAME: %[[a0:arg[0-9]+]] +// CHECK-NEXT: llvm.return %[[a0]] +llvm.func @fold_gep(%x : !llvm.ptr) -> !llvm.ptr { + %c0 = arith.constant 0 : i32 + %c = llvm.getelementptr %x[%c0] : (!llvm.ptr, i32) -> !llvm.ptr + llvm.return %c : !llvm.ptr +} + +// ----- + +// CHECK-LABEL: fold_gep_canon +// CHECK-SAME: %[[a0:arg[0-9]+]] +// CHECK-NEXT: %[[RES:.*]] = llvm.getelementptr %[[a0]][2] +// CHECK-NEXT: llvm.return %[[RES]] +llvm.func @fold_gep_canon(%x : !llvm.ptr) -> !llvm.ptr { + %c2 = arith.constant 2 : i32 + %c = llvm.getelementptr %x[%c2] : (!llvm.ptr, i32) -> !llvm.ptr + llvm.return %c : !llvm.ptr +} + +// ----- + +// CHECK-LABEL: load_dce +// CHECK-NEXT: llvm.return +llvm.func @load_dce(%x : !llvm.ptr) { + %0 = llvm.load %x : !llvm.ptr + llvm.return +} + +llvm.mlir.global external @fp() : !llvm.ptr + +// CHECK-LABEL: addr_dce +// CHECK-NEXT: llvm.return +llvm.func @addr_dce(%x : !llvm.ptr) { + %0 = llvm.mlir.addressof @fp : !llvm.ptr> + llvm.return +} + +// CHECK-LABEL: alloca_dce +// CHECK-NEXT: llvm.return +llvm.func @alloca_dce() { + %c1_i64 = arith.constant 1 : i64 + %0 = llvm.alloca %c1_i64 x i32 : (i64) -> !llvm.ptr + llvm.return +} diff --git a/mlir/test/Dialect/LLVMIR/canonicalize.mlir b/mlir/test/Dialect/LLVMIR/canonicalize.mlir index 7fa7684f5ace0..6b2cac14f2985 100644 --- a/mlir/test/Dialect/LLVMIR/canonicalize.mlir +++ b/mlir/test/Dialect/LLVMIR/canonicalize.mlir @@ -37,8 +37,8 @@ llvm.func @no_fold_extractvalue(%arr: !llvm.array<4 x f32>) -> f32 { %3 = llvm.extractvalue %2[0, 0] : !llvm.array<4 x !llvm.array<4 x f32>> llvm.return %3 : f32 - } + // ----- // CHECK-LABEL: fold_unrelated_extractvalue @@ -56,18 +56,18 @@ llvm.func @fold_unrelated_extractvalue(%arr: !llvm.array<4 x f32>) -> f32 { // CHECK-LABEL: fold_bitcast // CHECK-SAME: %[[a0:arg[0-9]+]] // CHECK-NEXT: llvm.return %[[a0]] -llvm.func @fold_bitcast(%x : !llvm.ptr) -> !llvm.ptr { - %c = llvm.bitcast %x : !llvm.ptr to !llvm.ptr - llvm.return %c : !llvm.ptr +llvm.func @fold_bitcast(%x : !llvm.ptr) -> !llvm.ptr { + %c = llvm.bitcast %x : !llvm.ptr to !llvm.ptr + llvm.return %c : !llvm.ptr } // CHECK-LABEL: fold_bitcast2 // CHECK-SAME: %[[a0:arg[0-9]+]] // CHECK-NEXT: llvm.return %[[a0]] -llvm.func @fold_bitcast2(%x : !llvm.ptr) -> !llvm.ptr { - %c = llvm.bitcast %x : !llvm.ptr to !llvm.ptr - %d = llvm.bitcast %c : !llvm.ptr to !llvm.ptr - llvm.return %d : !llvm.ptr +llvm.func @fold_bitcast2(%x : i32) -> i32 { + %c = llvm.bitcast %x : i32 to f32 + %d = llvm.bitcast %c : f32 to i32 + llvm.return %d : i32 } // ----- @@ -75,18 +75,18 @@ llvm.func @fold_bitcast2(%x : !llvm.ptr) -> !llvm.ptr { // CHECK-LABEL: fold_addrcast // CHECK-SAME: %[[a0:arg[0-9]+]] // CHECK-NEXT: llvm.return %[[a0]] -llvm.func @fold_addrcast(%x : !llvm.ptr) -> !llvm.ptr { - %c = llvm.addrspacecast %x : !llvm.ptr to !llvm.ptr - llvm.return %c : !llvm.ptr +llvm.func @fold_addrcast(%x : !llvm.ptr) -> !llvm.ptr { + %c = llvm.addrspacecast %x : !llvm.ptr to !llvm.ptr + llvm.return %c : !llvm.ptr } // CHECK-LABEL: fold_addrcast2 // CHECK-SAME: %[[a0:arg[0-9]+]] // CHECK-NEXT: llvm.return %[[a0]] -llvm.func @fold_addrcast2(%x : !llvm.ptr) -> !llvm.ptr { - %c = llvm.addrspacecast %x : !llvm.ptr to !llvm.ptr - %d = llvm.addrspacecast %c : !llvm.ptr to !llvm.ptr - llvm.return %d : !llvm.ptr +llvm.func @fold_addrcast2(%x : !llvm.ptr) -> !llvm.ptr { + %c = llvm.addrspacecast %x : !llvm.ptr to !llvm.ptr<5> + %d = llvm.addrspacecast %c : !llvm.ptr<5> to !llvm.ptr + llvm.return %d : !llvm.ptr } // ----- @@ -94,10 +94,10 @@ llvm.func @fold_addrcast2(%x : !llvm.ptr) -> !llvm.ptr { // CHECK-LABEL: fold_gep // CHECK-SAME: %[[a0:arg[0-9]+]] // CHECK-NEXT: llvm.return %[[a0]] -llvm.func @fold_gep(%x : !llvm.ptr) -> !llvm.ptr { +llvm.func @fold_gep(%x : !llvm.ptr) -> !llvm.ptr { %c0 = arith.constant 0 : i32 - %c = llvm.getelementptr %x[%c0] : (!llvm.ptr, i32) -> !llvm.ptr - llvm.return %c : !llvm.ptr + %c = llvm.getelementptr %x[%c0] : (!llvm.ptr, i32) -> !llvm.ptr, i8 + llvm.return %c : !llvm.ptr } // CHECK-LABEL: fold_gep_neg @@ -114,13 +114,12 @@ llvm.func @fold_gep_neg(%x : !llvm.ptr) -> !llvm.ptr { // CHECK-SAME: %[[a0:arg[0-9]+]] // CHECK-NEXT: %[[RES:.*]] = llvm.getelementptr %[[a0]][2] // CHECK-NEXT: llvm.return %[[RES]] -llvm.func @fold_gep_canon(%x : !llvm.ptr) -> !llvm.ptr { +llvm.func @fold_gep_canon(%x : !llvm.ptr) -> !llvm.ptr { %c2 = arith.constant 2 : i32 - %c = llvm.getelementptr %x[%c2] : (!llvm.ptr, i32) -> !llvm.ptr - llvm.return %c : !llvm.ptr + %c = llvm.getelementptr %x[%c2] : (!llvm.ptr, i32) -> !llvm.ptr, i8 + llvm.return %c : !llvm.ptr } - // ----- // Check that LLVM constants participate in cross-dialect constant folding. The @@ -142,17 +141,17 @@ llvm.func @llvm_constant() -> i32 { // CHECK-LABEL: load_dce // CHECK-NEXT: llvm.return -llvm.func @load_dce(%x : !llvm.ptr) { - %0 = llvm.load %x : !llvm.ptr +llvm.func @load_dce(%x : !llvm.ptr) { + %0 = llvm.load %x : !llvm.ptr -> i8 llvm.return } -llvm.mlir.global external @fp() : !llvm.ptr +llvm.mlir.global external @fp() : !llvm.ptr // CHECK-LABEL: addr_dce // CHECK-NEXT: llvm.return -llvm.func @addr_dce(%x : !llvm.ptr) { - %0 = llvm.mlir.addressof @fp : !llvm.ptr> +llvm.func @addr_dce(%x : !llvm.ptr) { + %0 = llvm.mlir.addressof @fp : !llvm.ptr llvm.return } @@ -160,6 +159,6 @@ llvm.func @addr_dce(%x : !llvm.ptr) { // CHECK-NEXT: llvm.return llvm.func @alloca_dce() { %c1_i64 = arith.constant 1 : i64 - %0 = llvm.alloca %c1_i64 x i32 : (i64) -> !llvm.ptr + %0 = llvm.alloca %c1_i64 x i32 : (i64) -> !llvm.ptr llvm.return } diff --git a/mlir/test/Dialect/LLVMIR/debuginfo.mlir b/mlir/test/Dialect/LLVMIR/debuginfo.mlir index 7aaef0d31bb9d..f7517b2f23108 100644 --- a/mlir/test/Dialect/LLVMIR/debuginfo.mlir +++ b/mlir/test/Dialect/LLVMIR/debuginfo.mlir @@ -134,10 +134,10 @@ llvm.func @addr(%arg: i64) { // CHECK: %[[ALLOC:.*]] = llvm.alloca %allocCount = llvm.mlir.constant(1 : i32) : i32 - %alloc = llvm.alloca %allocCount x i64 : (i32) -> !llvm.ptr + %alloc = llvm.alloca %allocCount x i64 : (i32) -> !llvm.ptr // CHECK: llvm.intr.dbg.declare #[[VAR0]] = %[[ALLOC]] - llvm.intr.dbg.declare #var0 = %alloc : !llvm.ptr + llvm.intr.dbg.declare #var0 = %alloc : !llvm.ptr llvm.return } diff --git a/mlir/test/Dialect/LLVMIR/dynamic-gep-index-typed-pointers.mlir b/mlir/test/Dialect/LLVMIR/dynamic-gep-index-typed-pointers.mlir new file mode 100644 index 0000000000000..9e14b1db3432b --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/dynamic-gep-index-typed-pointers.mlir @@ -0,0 +1,12 @@ +// RUN: mlir-opt %s | FileCheck %s + +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>>} { + // CHECK: llvm.func @foo(%[[ARG0:.+]]: !llvm.ptr>, %[[ARG1:.+]]: i32) + llvm.func @foo(%arg0: !llvm.ptr, array<4 x i32>)>>, %arg1: i32) { + // CHECK: %[[C0:.+]] = llvm.mlir.constant(0 : i32) + %0 = llvm.mlir.constant(0 : i32) : i32 + // CHECK: llvm.getelementptr %[[ARG0]][%[[C0]], 1, %[[ARG1]]] + %1 = "llvm.getelementptr"(%arg0, %0, %arg1) {rawConstantIndices = array} : (!llvm.ptr, array<4 x i32>)>>, i32, i32) -> !llvm.ptr + llvm.return + } +} diff --git a/mlir/test/Dialect/LLVMIR/dynamic-gep-index.mlir b/mlir/test/Dialect/LLVMIR/dynamic-gep-index.mlir index 9e14b1db3432b..f5808134ea026 100644 --- a/mlir/test/Dialect/LLVMIR/dynamic-gep-index.mlir +++ b/mlir/test/Dialect/LLVMIR/dynamic-gep-index.mlir @@ -1,12 +1,12 @@ // RUN: mlir-opt %s | FileCheck %s module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>>} { - // CHECK: llvm.func @foo(%[[ARG0:.+]]: !llvm.ptr>, %[[ARG1:.+]]: i32) - llvm.func @foo(%arg0: !llvm.ptr, array<4 x i32>)>>, %arg1: i32) { + // CHECK: llvm.func @foo(%[[ARG0:.+]]: !llvm.ptr, %[[ARG1:.+]]: i32) + llvm.func @foo(%arg0: !llvm.ptr, %arg1: i32) { // CHECK: %[[C0:.+]] = llvm.mlir.constant(0 : i32) %0 = llvm.mlir.constant(0 : i32) : i32 // CHECK: llvm.getelementptr %[[ARG0]][%[[C0]], 1, %[[ARG1]]] - %1 = "llvm.getelementptr"(%arg0, %0, %arg1) {rawConstantIndices = array} : (!llvm.ptr, array<4 x i32>)>>, i32, i32) -> !llvm.ptr + %1 = "llvm.getelementptr"(%arg0, %0, %arg1) {elem_type = !llvm.struct<"my_struct", (struct<"sub_struct", (i32, i8)>, array<4 x i32>)>, rawConstantIndices = array} : (!llvm.ptr, i32, i32) -> !llvm.ptr llvm.return } } diff --git a/mlir/test/Dialect/LLVMIR/func.mlir b/mlir/test/Dialect/LLVMIR/func.mlir index 5cc7d75b627fa..50f6c6a0e56f5 100644 --- a/mlir/test/Dialect/LLVMIR/func.mlir +++ b/mlir/test/Dialect/LLVMIR/func.mlir @@ -33,10 +33,10 @@ module { // GENERIC-SAME: () -> () }) {sym_name = "baz", function_type = !llvm.func} : () -> () - // CHECK: llvm.func @qux(!llvm.ptr {llvm.noalias}, i64) + // CHECK: llvm.func @qux(!llvm.ptr {llvm.noalias}, i64) // CHECK: attributes {xxx = {yyy = 42 : i64}} "llvm.func"() ({ - }) {sym_name = "qux", function_type = !llvm.func, i64)>, + }) {sym_name = "qux", function_type = !llvm.func, arg_attrs = [{llvm.noalias}, {}], xxx = {yyy = 42}} : () -> () // CHECK: llvm.func @roundtrip1() @@ -71,56 +71,56 @@ module { // CHECK: llvm.func @roundtrip8() -> i32 llvm.func @roundtrip8() -> i32 attributes {} - // CHECK: llvm.func @roundtrip9(!llvm.ptr {llvm.noalias}) - llvm.func @roundtrip9(!llvm.ptr {llvm.noalias}) + // CHECK: llvm.func @roundtrip9(!llvm.ptr {llvm.noalias}) + llvm.func @roundtrip9(!llvm.ptr {llvm.noalias}) - // CHECK: llvm.func @roundtrip10(!llvm.ptr {llvm.noalias}) - llvm.func @roundtrip10(%arg0: !llvm.ptr {llvm.noalias}) + // CHECK: llvm.func @roundtrip10(!llvm.ptr {llvm.noalias}) + llvm.func @roundtrip10(%arg0: !llvm.ptr {llvm.noalias}) - // CHECK: llvm.func @roundtrip11(%{{.*}}: !llvm.ptr {llvm.noalias}) { - llvm.func @roundtrip11(%arg0: !llvm.ptr {llvm.noalias}) { + // CHECK: llvm.func @roundtrip11(%{{.*}}: !llvm.ptr {llvm.noalias}) { + llvm.func @roundtrip11(%arg0: !llvm.ptr {llvm.noalias}) { llvm.return } - // CHECK: llvm.func @roundtrip12(%{{.*}}: !llvm.ptr {llvm.noalias}) + // CHECK: llvm.func @roundtrip12(%{{.*}}: !llvm.ptr {llvm.noalias}) // CHECK: attributes {foo = 42 : i32} - llvm.func @roundtrip12(%arg0: !llvm.ptr {llvm.noalias}) + llvm.func @roundtrip12(%arg0: !llvm.ptr {llvm.noalias}) attributes {foo = 42 : i32} { llvm.return } - // CHECK: llvm.func @byvalattr(%{{.*}}: !llvm.ptr {llvm.byval = i32}) - llvm.func @byvalattr(%arg0: !llvm.ptr {llvm.byval = i32}) { + // CHECK: llvm.func @byvalattr(%{{.*}}: !llvm.ptr {llvm.byval = i32}) + llvm.func @byvalattr(%arg0: !llvm.ptr {llvm.byval = i32}) { llvm.return } - // CHECK: llvm.func @sretattr(%{{.*}}: !llvm.ptr {llvm.sret = i32}) - // LOCINFO: llvm.func @sretattr(%{{.*}}: !llvm.ptr {llvm.sret = i32} loc("some_source_loc")) - llvm.func @sretattr(%arg0: !llvm.ptr {llvm.sret = i32} loc("some_source_loc")) { + // CHECK: llvm.func @sretattr(%{{.*}}: !llvm.ptr {llvm.sret = i32}) + // LOCINFO: llvm.func @sretattr(%{{.*}}: !llvm.ptr {llvm.sret = i32} loc("some_source_loc")) + llvm.func @sretattr(%arg0: !llvm.ptr {llvm.sret = i32} loc("some_source_loc")) { llvm.return } - // CHECK: llvm.func @nestattr(%{{.*}}: !llvm.ptr {llvm.nest}) - llvm.func @nestattr(%arg0: !llvm.ptr {llvm.nest}) { + // CHECK: llvm.func @nestattr(%{{.*}}: !llvm.ptr {llvm.nest}) + llvm.func @nestattr(%arg0: !llvm.ptr {llvm.nest}) { llvm.return } - // CHECK: llvm.func @llvm_noalias_decl(!llvm.ptr {llvm.noalias}) - llvm.func @llvm_noalias_decl(!llvm.ptr {llvm.noalias}) - // CHECK: llvm.func @byrefattr_decl(!llvm.ptr {llvm.byref = i32}) - llvm.func @byrefattr_decl(!llvm.ptr {llvm.byref = i32}) - // CHECK: llvm.func @byvalattr_decl(!llvm.ptr {llvm.byval = i32}) - llvm.func @byvalattr_decl(!llvm.ptr {llvm.byval = i32}) - // CHECK: llvm.func @sretattr_decl(!llvm.ptr {llvm.sret = i32}) - llvm.func @sretattr_decl(!llvm.ptr {llvm.sret = i32}) - // CHECK: llvm.func @nestattr_decl(!llvm.ptr {llvm.nest}) - llvm.func @nestattr_decl(!llvm.ptr {llvm.nest}) + // CHECK: llvm.func @llvm_noalias_decl(!llvm.ptr {llvm.noalias}) + llvm.func @llvm_noalias_decl(!llvm.ptr {llvm.noalias}) + // CHECK: llvm.func @byrefattr_decl(!llvm.ptr {llvm.byref = i32}) + llvm.func @byrefattr_decl(!llvm.ptr {llvm.byref = i32}) + // CHECK: llvm.func @byvalattr_decl(!llvm.ptr {llvm.byval = i32}) + llvm.func @byvalattr_decl(!llvm.ptr {llvm.byval = i32}) + // CHECK: llvm.func @sretattr_decl(!llvm.ptr {llvm.sret = i32}) + llvm.func @sretattr_decl(!llvm.ptr {llvm.sret = i32}) + // CHECK: llvm.func @nestattr_decl(!llvm.ptr {llvm.nest}) + llvm.func @nestattr_decl(!llvm.ptr {llvm.nest}) // CHECK: llvm.func @noundefattr_decl(i32 {llvm.noundef}) llvm.func @noundefattr_decl(i32 {llvm.noundef}) - // CHECK: llvm.func @llvm_align_decl(!llvm.ptr {llvm.align = 4 : i64}) - llvm.func @llvm_align_decl(!llvm.ptr {llvm.align = 4}) - // CHECK: llvm.func @inallocaattr_decl(!llvm.ptr {llvm.inalloca = i32}) - llvm.func @inallocaattr_decl(!llvm.ptr {llvm.inalloca = i32}) + // CHECK: llvm.func @llvm_align_decl(!llvm.ptr {llvm.align = 4 : i64}) + llvm.func @llvm_align_decl(!llvm.ptr {llvm.align = 4}) + // CHECK: llvm.func @inallocaattr_decl(!llvm.ptr {llvm.inalloca = i32}) + llvm.func @inallocaattr_decl(!llvm.ptr {llvm.inalloca = i32}) // CHECK: llvm.func @variadic(...) diff --git a/mlir/test/Dialect/LLVMIR/global-typed-pointers.mlir b/mlir/test/Dialect/LLVMIR/global-typed-pointers.mlir new file mode 100644 index 0000000000000..56d720cc866b6 --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/global-typed-pointers.mlir @@ -0,0 +1,46 @@ +// RUN: mlir-opt -split-input-file -verify-diagnostics %s | FileCheck %s + +// CHECK: llvm.mlir.global internal @global(42 : i64) {addr_space = 0 : i32} : i64 +llvm.mlir.global internal @global(42 : i64) : i64 + +// CHECK: llvm.mlir.global internal constant @".string"("foobar") +llvm.mlir.global internal constant @".string"("foobar") : !llvm.array<6 x i8> + +func.func @references() { + // CHECK: llvm.mlir.addressof @global : !llvm.ptr + %0 = llvm.mlir.addressof @global : !llvm.ptr + + // CHECK: llvm.mlir.addressof @".string" : !llvm.ptr> + %1 = llvm.mlir.addressof @".string" : !llvm.ptr> + + llvm.return +} + +// ----- + +llvm.mlir.global internal @foo(0: i32) : i32 + +func.func @bar() { + // expected-error @+1 {{the type must be a pointer to the type of the referenced global}} + llvm.mlir.addressof @foo : !llvm.ptr + llvm.return +} + +// ----- + +llvm.func @foo() + +llvm.func @bar() { + // expected-error @+1 {{the type must be a pointer to the type of the referenced function}} + llvm.mlir.addressof @foo : !llvm.ptr + llvm.return +} + +// ----- + +llvm.mlir.global internal @g(32 : i64) {addr_space = 3: i32} : i64 +func.func @mismatch_addr_space() { + // expected-error @+1 {{pointer address space must match address space of the referenced global}} + llvm.mlir.addressof @g : !llvm.ptr + llvm.return +} diff --git a/mlir/test/Dialect/LLVMIR/global.mlir b/mlir/test/Dialect/LLVMIR/global.mlir index 2f0850834a0ef..aff116db5dcca 100644 --- a/mlir/test/Dialect/LLVMIR/global.mlir +++ b/mlir/test/Dialect/LLVMIR/global.mlir @@ -66,17 +66,14 @@ llvm.mlir.global external @has_addr_space(32 : i64) {addr_space = 3: i32} : i64 // CHECK-LABEL: references func.func @references() { - // CHECK: llvm.mlir.addressof @global : !llvm.ptr - %0 = llvm.mlir.addressof @global : !llvm.ptr - - // CHECK: llvm.mlir.addressof @".string" : !llvm.ptr> - %1 = llvm.mlir.addressof @".string" : !llvm.ptr> + // CHECK: llvm.mlir.addressof @".string" : !llvm.ptr + %0 = llvm.mlir.addressof @".string" : !llvm.ptr // CHECK: llvm.mlir.addressof @global : !llvm.ptr - %2 = llvm.mlir.addressof @global : !llvm.ptr + %1 = llvm.mlir.addressof @global : !llvm.ptr // CHECK: llvm.mlir.addressof @has_addr_space : !llvm.ptr<3> - %3 = llvm.mlir.addressof @has_addr_space : !llvm.ptr<3> + %2 = llvm.mlir.addressof @has_addr_space : !llvm.ptr<3> llvm.return } @@ -164,7 +161,7 @@ func.func @foo() { // The attribute parser will consume the first colon-type, so we put two of // them to trigger the attribute type mismatch error. // expected-error @+1 {{invalid kind of attribute specified}} - llvm.mlir.addressof "foo" : i64 : !llvm.ptr> + llvm.mlir.addressof "foo" : i64 : !llvm.ptr llvm.return } @@ -172,27 +169,7 @@ func.func @foo() { func.func @foo() { // expected-error @+1 {{must reference a global defined by 'llvm.mlir.global'}} - llvm.mlir.addressof @foo : !llvm.ptr> - llvm.return -} - -// ----- - -llvm.mlir.global internal @foo(0: i32) : i32 - -func.func @bar() { - // expected-error @+1 {{the type must be a pointer to the type of the referenced global}} - llvm.mlir.addressof @foo : !llvm.ptr - llvm.return -} - -// ----- - -llvm.func @foo() - -llvm.func @bar() { - // expected-error @+1 {{the type must be a pointer to the type of the referenced function}} - llvm.mlir.addressof @foo : !llvm.ptr + llvm.mlir.addressof @foo : !llvm.ptr llvm.return } @@ -224,23 +201,15 @@ llvm.mlir.global internal @g(43 : i64) : i64 { llvm.mlir.global internal @g(32 : i64) {addr_space = 3: i32} : i64 func.func @mismatch_addr_space_implicit_global() { // expected-error @+1 {{pointer address space must match address space of the referenced global}} - llvm.mlir.addressof @g : !llvm.ptr + llvm.mlir.addressof @g : !llvm.ptr llvm.return } // ----- llvm.mlir.global internal @g(32 : i64) {addr_space = 3: i32} : i64 -func.func @mismatch_addr_space() { - // expected-error @+1 {{pointer address space must match address space of the referenced global}} - llvm.mlir.addressof @g : !llvm.ptr - llvm.return -} -// ----- -llvm.mlir.global internal @g(32 : i64) {addr_space = 3: i32} : i64 - -func.func @mismatch_addr_space_opaque() { +func.func @mismatch_addr_space() { // expected-error @+1 {{pointer address space must match address space of the referenced global}} llvm.mlir.addressof @g : !llvm.ptr<4> llvm.return diff --git a/mlir/test/Dialect/LLVMIR/inlining.mlir b/mlir/test/Dialect/LLVMIR/inlining.mlir index cefb8d5e461d4..e6dc047fd42b9 100644 --- a/mlir/test/Dialect/LLVMIR/inlining.mlir +++ b/mlir/test/Dialect/LLVMIR/inlining.mlir @@ -160,12 +160,29 @@ llvm.func @caller() { // ----- -llvm.func @callee_noinline() attributes { passthrough = ["noinline"] } -llvm.func @callee_optnone() attributes { passthrough = ["optnone"] } -llvm.func @callee_noduplicate() attributes { passthrough = ["noduplicate"] } -llvm.func @callee_presplitcoroutine() attributes { passthrough = ["presplitcoroutine"] } -llvm.func @callee_returns_twice() attributes { passthrough = ["returns_twice"] } -llvm.func @callee_strictfp() attributes { passthrough = ["strictfp"] } +llvm.func @callee_noinline() attributes { passthrough = ["noinline"] } { + llvm.return +} + +llvm.func @callee_optnone() attributes { passthrough = ["optnone"] } { + llvm.return +} + +llvm.func @callee_noduplicate() attributes { passthrough = ["noduplicate"] } { + llvm.return +} + +llvm.func @callee_presplitcoroutine() attributes { passthrough = ["presplitcoroutine"] } { + llvm.return +} + +llvm.func @callee_returns_twice() attributes { passthrough = ["returns_twice"] } { + llvm.return +} + +llvm.func @callee_strictfp() attributes { passthrough = ["strictfp"] } { + llvm.return +} // CHECK-LABEL: llvm.func @caller // CHECK-NEXT: llvm.call @callee_noinline diff --git a/mlir/test/Dialect/LLVMIR/invalid-typed-pointers.mlir b/mlir/test/Dialect/LLVMIR/invalid-typed-pointers.mlir new file mode 100644 index 0000000000000..033b84d04ef87 --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/invalid-typed-pointers.mlir @@ -0,0 +1,283 @@ +// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -verify-diagnostics + +func.func @alloca_ptr_type_attr_non_opaque_ptr(%sz : i64) { + // expected-error@below {{unexpected 'elem_type' attribute when non-opaque pointer type is used}} + "llvm.alloca"(%sz) { elem_type = i32 } : (i64) -> !llvm.ptr +} + +// ----- + +func.func @gep_missing_input_type(%pos : i64, %base : !llvm.ptr) { + // expected-error@+1 {{2 operands present, but expected 0}} + llvm.getelementptr %base[%pos] : () -> (!llvm.ptr) +} + +// ----- + +func.func @gep_missing_result_type(%pos : i64, %base : !llvm.ptr) { + // expected-error@+1 {{op requires one result}} + llvm.getelementptr %base[%pos] : (!llvm.ptr, i64) -> () +} + +// ----- + +func.func @gep_non_function_type(%pos : i64, %base : !llvm.ptr) { + // expected-error@+1 {{invalid kind of type specified}} + llvm.getelementptr %base[%pos] : !llvm.ptr +} + +// ----- + +func.func @gep_too_few_dynamic(%base : !llvm.ptr) { + // expected-error@+1 {{expected as many dynamic indices as specified in 'rawConstantIndices'}} + %1 = "llvm.getelementptr"(%base) {rawConstantIndices = array} : (!llvm.ptr) -> !llvm.ptr +} + +// ----- + +func.func @call_variadic(%callee : !llvm.ptr>, %arg : i8) { + // expected-error@+1 {{indirect calls to variadic functions are not supported}} + llvm.call %callee(%arg) : !llvm.ptr>, (i8) -> (i8) + llvm.return +} + +// ----- + +func.func @indirect_callee_arg_mismatch(%arg0 : i32, %callee : !llvm.ptr>) { + // expected-error@+1 {{'llvm.call' op operand type mismatch for operand 0: 'i32' != 'i8'}} + "llvm.call"(%callee, %arg0) : (!llvm.ptr>, i32) -> () + llvm.return +} + +// ----- + +func.func @indirect_callee_return_mismatch(%callee : !llvm.ptr>) { + // expected-error@+1 {{'llvm.call' op result type mismatch: 'i32' != 'i8'}} + "llvm.call"(%callee) : (!llvm.ptr>) -> (i32) + llvm.return +} + +// ----- + +func.func @atomicrmw_mismatched_operands(%f32_ptr : !llvm.ptr, %i32 : i32) { + // expected-error@+1 {{expected LLVM IR element type for operand #0 to match type for operand #1}} + %0 = "llvm.atomicrmw"(%f32_ptr, %i32) {bin_op=11, ordering=1} : (!llvm.ptr, i32) -> i32 + llvm.return +} + +// ----- + +func.func @cmpxchg_expected_ptr(%f32 : f32) { + // expected-error@+1 {{op operand #0 must be LLVM pointer to integer or LLVM pointer type}} + %0 = "llvm.cmpxchg"(%f32, %f32, %f32) {success_ordering=2,failure_ordering=2} : (f32, f32, f32) -> !llvm.struct<(f32, i1)> + llvm.return +} + +// ----- + +func.func @cmpxchg_mismatched_operands(%i64_ptr : !llvm.ptr, %i32 : i32) { + // expected-error@+1 {{expected LLVM IR element type for operand #0 to match type for all other operands}} + %0 = "llvm.cmpxchg"(%i64_ptr, %i32, %i32) {success_ordering=2,failure_ordering=2} : (!llvm.ptr, i32, i32) -> !llvm.struct<(i32, i1)> + llvm.return +} + +// ----- + +llvm.func @foo(i32) -> i32 +llvm.func @__gxx_personality_v0(...) -> i32 + +llvm.func @bad_landingpad(%arg0: !llvm.ptr>) -> i32 attributes { personality = @__gxx_personality_v0} { + %0 = llvm.mlir.constant(3 : i32) : i32 + %1 = llvm.mlir.constant(2 : i32) : i32 + %2 = llvm.invoke @foo(%1) to ^bb1 unwind ^bb2 : (i32) -> i32 +^bb1: // pred: ^bb0 + llvm.return %1 : i32 +^bb2: // pred: ^bb0 + // expected-error@+1 {{clause #0 is not a known constant - null, addressof, bitcast}} + %3 = llvm.landingpad cleanup (catch %1 : i32) (catch %arg0 : !llvm.ptr>) : !llvm.struct<(ptr, i32)> + llvm.return %0 : i32 +} + +// ----- + +llvm.func @foo(i32) -> i32 +llvm.func @__gxx_personality_v0(...) -> i32 + +llvm.func @caller(%arg0: i32) -> i32 attributes { personality = @__gxx_personality_v0} { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x !llvm.ptr : (i32) -> !llvm.ptr> + // expected-note@+1 {{global addresses expected as operand to bitcast used in clauses for landingpad}} + %2 = llvm.bitcast %1 : !llvm.ptr> to !llvm.ptr + %3 = llvm.invoke @foo(%0) to ^bb1 unwind ^bb2 : (i32) -> i32 +^bb1: // pred: ^bb0 + llvm.return %0 : i32 +^bb2: // pred: ^bb0 + // expected-error@+1 {{constant clauses expected}} + %5 = llvm.landingpad (catch %2 : !llvm.ptr) : !llvm.struct<(ptr, i32)> + llvm.return %0 : i32 +} + +// ----- + +llvm.func @foo(i32) -> i32 +llvm.func @__gxx_personality_v0(...) -> i32 + +llvm.func @caller(%arg0: i32) -> i32 attributes { personality = @__gxx_personality_v0} { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.invoke @foo(%0) to ^bb1 unwind ^bb2 : (i32) -> i32 +^bb1: // pred: ^bb0 + llvm.return %0 : i32 +^bb2: // pred: ^bb0 + // expected-error@+1 {{landingpad instruction expects at least one clause or cleanup attribute}} + %2 = llvm.landingpad : !llvm.struct<(ptr, i32)> + llvm.return %0 : i32 +} + +// ----- + +llvm.func @foo(i32) -> i32 +llvm.func @__gxx_personality_v0(...) -> i32 + +llvm.func @caller(%arg0: i32) -> i32 attributes { personality = @__gxx_personality_v0 } { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.invoke @foo(%0) to ^bb1 unwind ^bb2 : (i32) -> i32 +^bb1: // pred: ^bb0 + llvm.return %0 : i32 +^bb2: // pred: ^bb0 + %2 = llvm.landingpad cleanup : !llvm.struct<(ptr, i32)> + // expected-error@+1 {{'llvm.resume' op expects landingpad value as operand}} + llvm.resume %0 : i32 +} + +// ----- + +llvm.func @foo(i32) -> i32 + +llvm.func @caller(%arg0: i32) -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.invoke @foo(%0) to ^bb1 unwind ^bb2 : (i32) -> i32 +^bb1: // pred: ^bb0 + llvm.return %0 : i32 +^bb2: // pred: ^bb0 + // expected-error@+1 {{llvm.landingpad needs to be in a function with a personality}} + %2 = llvm.landingpad cleanup : !llvm.struct<(ptr, i32)> + llvm.resume %2 : !llvm.struct<(ptr, i32)> +} + +// ----- + +llvm.func @wmmaLoadOp_invalid_mem_space(%arg0: !llvm.ptr<5>, %arg1: i32) { + // expected-error@+1 {{'nvvm.wmma.load' op expected source pointer in memory space 0, 1, 3}} + %0 = nvvm.wmma.load %arg0, %arg1 + {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + : (!llvm.ptr<5>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> + llvm.return +} + +// ----- + +llvm.func @wmmaLoadOp_invalid_AOp(%arg0: !llvm.ptr<3>, %arg1: i32) { + // expected-error@+1 {{'nvvm.wmma.load' op expected destination type is a structure of 8 elements of type 'vector<2xf16>'}} + %0 = nvvm.wmma.load %arg0, %arg1 + {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + : (!llvm.ptr<3>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> + llvm.return +} + +// ----- + +llvm.func @wmmaLoadOp_invalid_BOp(%arg0: !llvm.ptr<3>, %arg1: i32) { + // expected-error@+1 {{'nvvm.wmma.load' op expected destination type is a structure of 8 elements of type 'vector<2xf16>'}} + %0 = nvvm.wmma.load %arg0, %arg1 + {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + : (!llvm.ptr<3>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> + + llvm.return +} + +// ----- + +llvm.func @wmmaLoadOp_invalid_COp(%arg0: !llvm.ptr<3>, %arg1: i32) { + // expected-error@+1 {{'nvvm.wmma.load' op expected destination type is a structure of 4 elements of type 'vector<2xf16>'}} + %0 = nvvm.wmma.load %arg0, %arg1 + {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + : (!llvm.ptr<3>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> + + llvm.return +} + +// ----- + +llvm.func @wmmaStoreOp_invalid_mem_space(%arg0: !llvm.ptr<5>, %arg1: i32, + %arg2: vector<2 x f16>, %arg3: vector<2 x f16>, + %arg4: vector<2 x f16>, %arg5: vector<2 xf16>) { + // expected-error@+1 {{'nvvm.wmma.store' op expected operands to be a source pointer in memory space 0, 1, 3}} + nvvm.wmma.store %arg0, %arg1, %arg2, %arg3, %arg4, %arg5 + {eltype = #nvvm.mma_type, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + : !llvm.ptr<5>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16> + llvm.return +} + +// ----- + +llvm.func @wmmald_matrix(%arg0: !llvm.ptr) { + // expected-error@+1 {{'nvvm.ldmatrix' op expected source pointer in memory space 3}} + %l = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> i32 + llvm.return +} + +// ----- + +llvm.func @wmmald_matrix(%arg0: !llvm.ptr) { + // expected-error@+1 {{'nvvm.ldmatrix' op expected num attribute to be 1, 2 or 4}} + %l = nvvm.ldmatrix %arg0 {num = 3 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> i32 + llvm.return +} + +// ----- + +llvm.func @wmmald_matrix(%arg0: !llvm.ptr) { + // expected-error@+1 {{'nvvm.ldmatrix' op expected destination type is i32}} + %l = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> !llvm.struct<(i32)> + llvm.return +} + +// ----- + +llvm.func @wmmald_matrix(%arg0: !llvm.ptr) { + // expected-error@+1 {{'nvvm.ldmatrix' op expected destination type is a structure of 4 elements of type i32}} + %l = nvvm.ldmatrix %arg0 {num = 4 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> !llvm.struct<(i32, i32)> + llvm.return +} + +// ----- + +func.func @cp_async(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { + // expected-error @below {{expected byte size to be either 4, 8 or 16.}} + nvvm.cp.async.shared.global %arg0, %arg1, 32 : !llvm.ptr, !llvm.ptr + return +} + +// ----- + +func.func @cp_async(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { + // expected-error @below {{bypass l1 is only support for 16 bytes copy.}} + nvvm.cp.async.shared.global %arg0, %arg1, 8 {bypass_l1} : !llvm.ptr, !llvm.ptr + return +} + +// ----- + +func.func @gep_struct_variable(%arg0: !llvm.ptr>, %arg1: i32, %arg2: i32) { + // expected-error @below {{op expected index 1 indexing a struct to be constant}} + llvm.getelementptr %arg0[%arg1, %arg1] : (!llvm.ptr>, i32, i32) -> !llvm.ptr + return +} + +// ----- + +func.func @gep_out_of_bounds(%ptr: !llvm.ptr)>>, %idx: i64) { + // expected-error @below {{index 2 indexing a struct is out of bounds}} + llvm.getelementptr %ptr[%idx, 1, 3] : (!llvm.ptr)>>, i64) -> !llvm.ptr + return +} diff --git a/mlir/test/Dialect/LLVMIR/invalid.mlir b/mlir/test/Dialect/LLVMIR/invalid.mlir index 3e019144a199b..c3af84e55b881 100644 --- a/mlir/test/Dialect/LLVMIR/invalid.mlir +++ b/mlir/test/Dialect/LLVMIR/invalid.mlir @@ -64,7 +64,7 @@ func.func @alloca_missing_input_result_type(%size : i64) { func.func @alloca_missing_input_type() { // expected-error@+1 {{expected trailing function type with one argument and one result}} - llvm.alloca %size x i32 : () -> (!llvm.ptr) + llvm.alloca %size x i32 : () -> (!llvm.ptr) } // ----- @@ -78,14 +78,14 @@ func.func @alloca_missing_result_type() { func.func @alloca_non_function_type() { // expected-error@+1 {{expected trailing function type with one argument and one result}} - llvm.alloca %size x i32 : !llvm.ptr + llvm.alloca %size x i32 : !llvm.ptr } // ----- func.func @alloca_non_integer_alignment() { // expected-error@+1 {{expected integer alignment}} - llvm.alloca %size x i32 {alignment = 3.0} : !llvm.ptr + llvm.alloca %size x i32 {alignment = 3.0} : !llvm.ptr } // ----- @@ -97,44 +97,37 @@ func.func @alloca_opaque_ptr_no_type(%sz : i64) { // ----- -func.func @alloca_ptr_type_attr_non_opaque_ptr(%sz : i64) { - // expected-error@below {{unexpected 'elem_type' attribute when non-opaque pointer type is used}} - "llvm.alloca"(%sz) { elem_type = i32 } : (i64) -> !llvm.ptr -} - -// ----- - -func.func @gep_missing_input_result_type(%pos : i64, %base : !llvm.ptr) { +func.func @gep_missing_input_result_type(%pos : i64, %base : !llvm.ptr) { // expected-error@+1 {{2 operands present, but expected 0}} llvm.getelementptr %base[%pos] : () -> () } // ----- -func.func @gep_missing_input_type(%pos : i64, %base : !llvm.ptr) { +func.func @gep_missing_input_type(%pos : i64, %base : !llvm.ptr) { // expected-error@+1 {{2 operands present, but expected 0}} - llvm.getelementptr %base[%pos] : () -> (!llvm.ptr) + llvm.getelementptr %base[%pos] : () -> (!llvm.ptr) } // ----- -func.func @gep_missing_result_type(%pos : i64, %base : !llvm.ptr) { +func.func @gep_missing_result_type(%pos : i64, %base : !llvm.ptr) { // expected-error@+1 {{op requires one result}} - llvm.getelementptr %base[%pos] : (!llvm.ptr, i64) -> () + llvm.getelementptr %base[%pos] : (!llvm.ptr, i64) -> () } // ----- -func.func @gep_non_function_type(%pos : i64, %base : !llvm.ptr) { +func.func @gep_non_function_type(%pos : i64, %base : !llvm.ptr) { // expected-error@+1 {{invalid kind of type specified}} - llvm.getelementptr %base[%pos] : !llvm.ptr + llvm.getelementptr %base[%pos] : !llvm.ptr } // ----- -func.func @gep_too_few_dynamic(%base : !llvm.ptr) { +func.func @gep_too_few_dynamic(%base : !llvm.ptr) { // expected-error@+1 {{expected as many dynamic indices as specified in 'rawConstantIndices'}} - %1 = "llvm.getelementptr"(%base) {rawConstantIndices = array} : (!llvm.ptr) -> !llvm.ptr + %1 = "llvm.getelementptr"(%base) {elem_type = f32, rawConstantIndices = array} : (!llvm.ptr) -> !llvm.ptr } // ----- @@ -302,14 +295,6 @@ func.func @call_unknown_symbol() { // ----- -func.func @call_variadic(%callee : !llvm.ptr>, %arg : i8) { - // expected-error@+1 {{indirect calls to variadic functions are not supported}} - llvm.call %callee(%arg) : !llvm.ptr>, (i8) -> (i8) - llvm.return -} - -// ----- - func.func private @standard_func_callee() func.func @call_non_llvm() { @@ -346,14 +331,6 @@ func.func @callee_arg_mismatch(%arg0 : i32) { // ----- -func.func @indirect_callee_arg_mismatch(%arg0 : i32, %callee : !llvm.ptr>) { - // expected-error@+1 {{'llvm.call' op operand type mismatch for operand 0: 'i32' != 'i8'}} - "llvm.call"(%callee, %arg0) : (!llvm.ptr>, i32) -> () - llvm.return -} - -// ----- - llvm.func @callee_func() -> (i8) func.func @callee_return_mismatch() { @@ -364,14 +341,6 @@ func.func @callee_return_mismatch() { // ----- -func.func @indirect_callee_return_mismatch(%callee : !llvm.ptr>) { - // expected-error@+1 {{'llvm.call' op result type mismatch: 'i32' != 'i8'}} - "llvm.call"(%callee) : (!llvm.ptr>) -> (i32) - llvm.return -} - -// ----- - func.func @call_too_many_results(%callee : !llvm.ptr) { // expected-error@+1 {{expected function with 0 or 1 result}} llvm.call %callee() : !llvm.ptr, () -> (i32, i32) @@ -406,14 +375,14 @@ llvm.func @func_result_mismatch(%arg0: f32) -> i32 { func.func @constant_wrong_type() { // expected-error@+1 {{only supports integer, float, string or elements attributes}} - llvm.mlir.constant(@constant_wrong_type) : !llvm.ptr> + llvm.mlir.constant(@constant_wrong_type) : !llvm.ptr } // ----- func.func @constant_wrong_type_string() { // expected-error@below {{expected array type of 3 i8 elements for the string constant}} - llvm.mlir.constant("foo") : !llvm.ptr + llvm.mlir.constant("foo") : !llvm.ptr } // ----- @@ -671,47 +640,39 @@ func.func @atomicrmw_expected_ptr(%f32 : f32) { // ----- -func.func @atomicrmw_mismatched_operands(%f32_ptr : !llvm.ptr, %i32 : i32) { - // expected-error@+1 {{expected LLVM IR element type for operand #0 to match type for operand #1}} - %0 = "llvm.atomicrmw"(%f32_ptr, %i32) {bin_op=11, ordering=1} : (!llvm.ptr, i32) -> i32 - llvm.return -} - -// ----- - -func.func @atomicrmw_mismatched_operands(%f32_ptr : !llvm.ptr, %f32 : f32) { +func.func @atomicrmw_mismatched_operands(%f32_ptr : !llvm.ptr, %f32 : f32) { // expected-error@+1 {{op failed to verify that result #0 and operand #1 have the same type}} - %0 = "llvm.atomicrmw"(%f32_ptr, %f32) {bin_op=11, ordering=1} : (!llvm.ptr, f32) -> i32 + %0 = "llvm.atomicrmw"(%f32_ptr, %f32) {bin_op=11, ordering=1} : (!llvm.ptr, f32) -> i32 llvm.return } // ----- -func.func @atomicrmw_expected_float(%i32_ptr : !llvm.ptr, %i32 : i32) { +func.func @atomicrmw_expected_float(%i32_ptr : !llvm.ptr, %i32 : i32) { // expected-error@+1 {{expected LLVM IR floating point type}} - %0 = llvm.atomicrmw fadd %i32_ptr, %i32 unordered : !llvm.ptr, i32 + %0 = llvm.atomicrmw fadd %i32_ptr, %i32 unordered : !llvm.ptr, i32 llvm.return } // ----- -func.func @atomicrmw_unexpected_xchg_type(%i1_ptr : !llvm.ptr, %i1 : i1) { +func.func @atomicrmw_unexpected_xchg_type(%i1_ptr : !llvm.ptr, %i1 : i1) { // expected-error@+1 {{unexpected LLVM IR type for 'xchg' bin_op}} - %0 = llvm.atomicrmw xchg %i1_ptr, %i1 unordered : !llvm.ptr, i1 + %0 = llvm.atomicrmw xchg %i1_ptr, %i1 unordered : !llvm.ptr, i1 llvm.return } // ----- -func.func @atomicrmw_expected_int(%f32_ptr : !llvm.ptr, %f32 : f32) { +func.func @atomicrmw_expected_int(%f32_ptr : !llvm.ptr, %f32 : f32) { // expected-error@+1 {{expected LLVM IR integer type}} - %0 = llvm.atomicrmw max %f32_ptr, %f32 unordered : !llvm.ptr, f32 + %0 = llvm.atomicrmw max %f32_ptr, %f32 unordered : !llvm.ptr, f32 llvm.return } // ----- -func.func @cmpxchg_expected_ptr(%f32_ptr : !llvm.ptr, %f32 : f32) { +func.func @cmpxchg_expected_ptr(%f32 : f32) { // expected-error@+1 {{op operand #0 must be LLVM pointer to integer or LLVM pointer type}} %0 = "llvm.cmpxchg"(%f32, %f32, %f32) {success_ordering=2,failure_ordering=2} : (f32, f32, f32) -> !llvm.struct<(f32, i1)> llvm.return @@ -719,14 +680,6 @@ func.func @cmpxchg_expected_ptr(%f32_ptr : !llvm.ptr, %f32 : f32) { // ----- -func.func @cmpxchg_mismatched_operands(%i64_ptr : !llvm.ptr, %i32 : i32) { - // expected-error@+1 {{expected LLVM IR element type for operand #0 to match type for all other operands}} - %0 = "llvm.cmpxchg"(%i64_ptr, %i32, %i32) {success_ordering=2,failure_ordering=2} : (!llvm.ptr, i32, i32) -> !llvm.struct<(i32, i1)> - llvm.return -} - -// ----- - func.func @cmpxchg_mismatched_value_operands(%ptr : !llvm.ptr, %i32 : i32, %i64 : i64) { // expected-error@+1 {{op failed to verify that operand #1 and operand #2 have the same type}} %0 = "llvm.cmpxchg"(%ptr, %i32, %i64) {success_ordering=2,failure_ordering=2} : (!llvm.ptr, i32, i64) -> !llvm.struct<(i32, i1)> @@ -743,41 +696,41 @@ func.func @cmpxchg_mismatched_result(%ptr : !llvm.ptr, %i64 : i64) { // ----- -func.func @cmpxchg_unexpected_type(%i1_ptr : !llvm.ptr, %i1 : i1) { +func.func @cmpxchg_unexpected_type(%i1_ptr : !llvm.ptr, %i1 : i1) { // expected-error@+1 {{unexpected LLVM IR type}} - %0 = llvm.cmpxchg %i1_ptr, %i1, %i1 monotonic monotonic : !llvm.ptr, i1 + %0 = llvm.cmpxchg %i1_ptr, %i1, %i1 monotonic monotonic : !llvm.ptr, i1 llvm.return } // ----- -func.func @cmpxchg_at_least_monotonic_success(%i32_ptr : !llvm.ptr, %i32 : i32) { +func.func @cmpxchg_at_least_monotonic_success(%i32_ptr : !llvm.ptr, %i32 : i32) { // expected-error@+1 {{ordering must be at least 'monotonic'}} - %0 = llvm.cmpxchg %i32_ptr, %i32, %i32 unordered monotonic : !llvm.ptr, i32 + %0 = llvm.cmpxchg %i32_ptr, %i32, %i32 unordered monotonic : !llvm.ptr, i32 llvm.return } // ----- -func.func @cmpxchg_at_least_monotonic_failure(%i32_ptr : !llvm.ptr, %i32 : i32) { +func.func @cmpxchg_at_least_monotonic_failure(%i32_ptr : !llvm.ptr, %i32 : i32) { // expected-error@+1 {{ordering must be at least 'monotonic'}} - %0 = llvm.cmpxchg %i32_ptr, %i32, %i32 monotonic unordered : !llvm.ptr, i32 + %0 = llvm.cmpxchg %i32_ptr, %i32, %i32 monotonic unordered : !llvm.ptr, i32 llvm.return } // ----- -func.func @cmpxchg_failure_release(%i32_ptr : !llvm.ptr, %i32 : i32) { +func.func @cmpxchg_failure_release(%i32_ptr : !llvm.ptr, %i32 : i32) { // expected-error@+1 {{failure ordering cannot be 'release' or 'acq_rel'}} - %0 = llvm.cmpxchg %i32_ptr, %i32, %i32 acq_rel release : !llvm.ptr, i32 + %0 = llvm.cmpxchg %i32_ptr, %i32, %i32 acq_rel release : !llvm.ptr, i32 llvm.return } // ----- -func.func @cmpxchg_failure_acq_rel(%i32_ptr : !llvm.ptr, %i32 : i32) { +func.func @cmpxchg_failure_acq_rel(%i32_ptr : !llvm.ptr, %i32 : i32) { // expected-error@+1 {{failure ordering cannot be 'release' or 'acq_rel'}} - %0 = llvm.cmpxchg %i32_ptr, %i32, %i32 acq_rel acq_rel : !llvm.ptr, i32 + %0 = llvm.cmpxchg %i32_ptr, %i32, %i32 acq_rel acq_rel : !llvm.ptr, i32 llvm.return } @@ -786,7 +739,7 @@ func.func @cmpxchg_failure_acq_rel(%i32_ptr : !llvm.ptr, %i32 : i32) { llvm.func @foo(i32) -> i32 llvm.func @__gxx_personality_v0(...) -> i32 -llvm.func @bad_landingpad(%arg0: !llvm.ptr>) -> i32 attributes { personality = @__gxx_personality_v0} { +llvm.func @bad_landingpad(%arg0: !llvm.ptr) -> i32 attributes { personality = @__gxx_personality_v0} { %0 = llvm.mlir.constant(3 : i32) : i32 %1 = llvm.mlir.constant(2 : i32) : i32 %2 = llvm.invoke @foo(%1) to ^bb1 unwind ^bb2 : (i32) -> i32 @@ -794,7 +747,7 @@ llvm.func @bad_landingpad(%arg0: !llvm.ptr>) -> i32 attributes { persona llvm.return %1 : i32 ^bb2: // pred: ^bb0 // expected-error@+1 {{clause #0 is not a known constant - null, addressof, bitcast}} - %3 = llvm.landingpad cleanup (catch %1 : i32) (catch %arg0 : !llvm.ptr>) : !llvm.struct<(ptr, i32)> + %3 = llvm.landingpad cleanup (catch %1 : i32) (catch %arg0 : !llvm.ptr) : !llvm.struct<(ptr, i32)> llvm.return %0 : i32 } @@ -805,15 +758,15 @@ llvm.func @__gxx_personality_v0(...) -> i32 llvm.func @caller(%arg0: i32) -> i32 attributes { personality = @__gxx_personality_v0} { %0 = llvm.mlir.constant(1 : i32) : i32 - %1 = llvm.alloca %0 x !llvm.ptr : (i32) -> !llvm.ptr> + %1 = llvm.alloca %0 x !llvm.ptr : (i32) -> !llvm.ptr // expected-note@+1 {{global addresses expected as operand to bitcast used in clauses for landingpad}} - %2 = llvm.bitcast %1 : !llvm.ptr> to !llvm.ptr + %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr %3 = llvm.invoke @foo(%0) to ^bb1 unwind ^bb2 : (i32) -> i32 ^bb1: // pred: ^bb0 llvm.return %0 : i32 ^bb2: // pred: ^bb0 // expected-error@+1 {{constant clauses expected}} - %5 = llvm.landingpad (catch %2 : !llvm.ptr) : !llvm.struct<(ptr, i32)> + %5 = llvm.landingpad (catch %2 : !llvm.ptr) : !llvm.struct<(ptr, i32)> llvm.return %0 : i32 } @@ -829,7 +782,7 @@ llvm.func @caller(%arg0: i32) -> i32 attributes { personality = @__gxx_personali llvm.return %0 : i32 ^bb2: // pred: ^bb0 // expected-error@+1 {{landingpad instruction expects at least one clause or cleanup attribute}} - %2 = llvm.landingpad : !llvm.struct<(ptr, i32)> + %2 = llvm.landingpad : !llvm.struct<(ptr, i32)> llvm.return %0 : i32 } @@ -844,7 +797,7 @@ llvm.func @caller(%arg0: i32) -> i32 attributes { personality = @__gxx_personali ^bb1: // pred: ^bb0 llvm.return %0 : i32 ^bb2: // pred: ^bb0 - %2 = llvm.landingpad cleanup : !llvm.struct<(ptr, i32)> + %2 = llvm.landingpad cleanup : !llvm.struct<(ptr, i32)> // expected-error@+1 {{'llvm.resume' op expects landingpad value as operand}} llvm.resume %0 : i32 } @@ -860,8 +813,8 @@ llvm.func @caller(%arg0: i32) -> i32 { llvm.return %0 : i32 ^bb2: // pred: ^bb0 // expected-error@+1 {{llvm.landingpad needs to be in a function with a personality}} - %2 = llvm.landingpad cleanup : !llvm.struct<(ptr, i32)> - llvm.resume %2 : !llvm.struct<(ptr, i32)> + %2 = llvm.landingpad cleanup : !llvm.struct<(ptr, i32)> + llvm.resume %2 : !llvm.struct<(ptr, i32)> } // ----- @@ -1056,55 +1009,55 @@ module { // ----- -llvm.func @wmmaLoadOp_invalid_mem_space(%arg0: !llvm.ptr, %arg1: i32) { +llvm.func @wmmaLoadOp_invalid_mem_space(%arg0: !llvm.ptr<5>, %arg1: i32) { // expected-error@+1 {{'nvvm.wmma.load' op expected source pointer in memory space 0, 1, 3}} %0 = nvvm.wmma.load %arg0, %arg1 {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} - : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> + : (!llvm.ptr<5>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> llvm.return } // ----- -llvm.func @wmmaLoadOp_invalid_AOp(%arg0: !llvm.ptr, %arg1: i32) { +llvm.func @wmmaLoadOp_invalid_AOp(%arg0: !llvm.ptr<3>, %arg1: i32) { // expected-error@+1 {{'nvvm.wmma.load' op expected destination type is a structure of 8 elements of type 'vector<2xf16>'}} %0 = nvvm.wmma.load %arg0, %arg1 {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} - : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> + : (!llvm.ptr<3>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> llvm.return } // ----- -llvm.func @wmmaLoadOp_invalid_BOp(%arg0: !llvm.ptr, %arg1: i32) { +llvm.func @wmmaLoadOp_invalid_BOp(%arg0: !llvm.ptr<3>, %arg1: i32) { // expected-error@+1 {{'nvvm.wmma.load' op expected destination type is a structure of 8 elements of type 'vector<2xf16>'}} %0 = nvvm.wmma.load %arg0, %arg1 {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} - : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> + : (!llvm.ptr<3>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> llvm.return } // ----- -llvm.func @wmmaLoadOp_invalid_COp(%arg0: !llvm.ptr, %arg1: i32) { +llvm.func @wmmaLoadOp_invalid_COp(%arg0: !llvm.ptr<3>, %arg1: i32) { // expected-error@+1 {{'nvvm.wmma.load' op expected destination type is a structure of 4 elements of type 'vector<2xf16>'}} %0 = nvvm.wmma.load %arg0, %arg1 {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} - : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> + : (!llvm.ptr<3>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> llvm.return } // ----- -llvm.func @wmmaStoreOp_invalid_mem_space(%arg0: !llvm.ptr, %arg1: i32, +llvm.func @wmmaStoreOp_invalid_mem_space(%arg0: !llvm.ptr<5>, %arg1: i32, %arg2: vector<2 x f16>, %arg3: vector<2 x f16>, %arg4: vector<2 x f16>, %arg5: vector<2 xf16>) { // expected-error@+1 {{'nvvm.wmma.store' op expected operands to be a source pointer in memory space 0, 1, 3}} nvvm.wmma.store %arg0, %arg1, %arg2, %arg3, %arg4, %arg5 {eltype = #nvvm.mma_type, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} - : !llvm.ptr, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16> + : !llvm.ptr<5>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16> llvm.return } @@ -1208,33 +1161,33 @@ llvm.func @gpu_wmma_mma_op_invalid_result(%arg0: vector<2 x f16>, %arg1: vector< // ----- -llvm.func @wmmald_matrix(%arg0: !llvm.ptr) { +llvm.func @wmmald_matrix(%arg0: !llvm.ptr) { // expected-error@+1 {{'nvvm.ldmatrix' op expected source pointer in memory space 3}} - %l = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> i32 + %l = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> i32 llvm.return } // ----- -llvm.func @wmmald_matrix(%arg0: !llvm.ptr) { +llvm.func @wmmald_matrix(%arg0: !llvm.ptr<3>) { // expected-error@+1 {{'nvvm.ldmatrix' op expected num attribute to be 1, 2 or 4}} - %l = nvvm.ldmatrix %arg0 {num = 3 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> i32 + %l = nvvm.ldmatrix %arg0 {num = 3 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr<3>) -> i32 llvm.return } // ----- -llvm.func @wmmald_matrix(%arg0: !llvm.ptr) { +llvm.func @wmmald_matrix(%arg0: !llvm.ptr<3>) { // expected-error@+1 {{'nvvm.ldmatrix' op expected destination type is i32}} - %l = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> !llvm.struct<(i32)> + %l = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr<3>) -> !llvm.struct<(i32)> llvm.return } // ----- -llvm.func @wmmald_matrix(%arg0: !llvm.ptr) { +llvm.func @wmmald_matrix(%arg0: !llvm.ptr<3>) { // expected-error@+1 {{'nvvm.ldmatrix' op expected destination type is a structure of 4 elements of type i32}} - %l = nvvm.ldmatrix %arg0 {num = 4 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> !llvm.struct<(i32, i32)> + %l = nvvm.ldmatrix %arg0 {num = 4 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32)> llvm.return } @@ -1278,33 +1231,33 @@ func.func @bitcast(%arg0: vector<2x3xf32>) { // ----- -func.func @cp_async(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { +func.func @cp_async(%arg0: !llvm.ptr<3>, %arg1: !llvm.ptr<1>) { // expected-error @below {{expected byte size to be either 4, 8 or 16.}} - nvvm.cp.async.shared.global %arg0, %arg1, 32 : !llvm.ptr, !llvm.ptr + nvvm.cp.async.shared.global %arg0, %arg1, 32 : !llvm.ptr<3>, !llvm.ptr<1> return } // ----- -func.func @cp_async(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { +func.func @cp_async(%arg0: !llvm.ptr<3>, %arg1: !llvm.ptr<1>) { // expected-error @below {{bypass l1 is only support for 16 bytes copy.}} - nvvm.cp.async.shared.global %arg0, %arg1, 8 {bypass_l1} : !llvm.ptr, !llvm.ptr + nvvm.cp.async.shared.global %arg0, %arg1, 8 {bypass_l1} : !llvm.ptr<3>, !llvm.ptr<1> return } // ----- -func.func @gep_struct_variable(%arg0: !llvm.ptr>, %arg1: i32, %arg2: i32) { +func.func @gep_struct_variable(%arg0: !llvm.ptr, %arg1: i32, %arg2: i32) { // expected-error @below {{op expected index 1 indexing a struct to be constant}} - llvm.getelementptr %arg0[%arg1, %arg1] : (!llvm.ptr>, i32, i32) -> !llvm.ptr + llvm.getelementptr %arg0[%arg1, %arg1] : (!llvm.ptr, i32, i32) -> !llvm.ptr, !llvm.struct<(i32)> return } // ----- -func.func @gep_out_of_bounds(%ptr: !llvm.ptr)>>, %idx: i64) { +func.func @gep_out_of_bounds(%ptr: !llvm.ptr, %idx: i64) { // expected-error @below {{index 2 indexing a struct is out of bounds}} - llvm.getelementptr %ptr[%idx, 1, 3] : (!llvm.ptr)>>, i64) -> !llvm.ptr + llvm.getelementptr %ptr[%idx, 1, 3] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(i32, struct<(i32, f32)>)> return } @@ -1321,8 +1274,8 @@ func.func @non_splat_shuffle_on_scalable_vector(%arg0: vector<[4]xf32>) { llvm.mlir.global internal @side_effecting_global() : !llvm.struct<(i8)> { %0 = llvm.mlir.constant(1 : i64) : i64 // expected-error@below {{ops with side effects not allowed in global initializers}} - %1 = llvm.alloca %0 x !llvm.struct<(i8)> : (i64) -> !llvm.ptr> - %2 = llvm.load %1 : !llvm.ptr> + %1 = llvm.alloca %0 x !llvm.struct<(i8)> : (i64) -> !llvm.ptr + %2 = llvm.load %1 : !llvm.ptr -> !llvm.struct<(i8)> llvm.return %2 : !llvm.struct<(i8)> } diff --git a/mlir/test/Dialect/LLVMIR/layout-typed-pointers.mlir b/mlir/test/Dialect/LLVMIR/layout-typed-pointers.mlir new file mode 100644 index 0000000000000..5cf1ed03e64c8 --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/layout-typed-pointers.mlir @@ -0,0 +1,145 @@ +// RUN: mlir-opt --test-data-layout-query --split-input-file --verify-diagnostics %s | FileCheck %s + +module { + // CHECK: @no_spec + func.func @no_spec() { + // CHECK: alignment = 8 + // CHECK: alloca_memory_space = 0 + // CHECK: bitsize = 64 + // CHECK: preferred = 8 + // CHECK: size = 8 + "test.data_layout_query"() : () -> !llvm.ptr + // CHECK: alignment = 8 + // CHECK: alloca_memory_space = 0 + // CHECK: bitsize = 64 + // CHECK: preferred = 8 + // CHECK: size = 8 + "test.data_layout_query"() : () -> !llvm.ptr + // CHECK: alignment = 8 + // CHECK: alloca_memory_space = 0 + // CHECK: bitsize = 64 + // CHECK: preferred = 8 + // CHECK: size = 8 + "test.data_layout_query"() : () -> !llvm.ptr + // CHECK: alignment = 8 + // CHECK: alloca_memory_space = 0 + // CHECK: bitsize = 64 + // CHECK: preferred = 8 + // CHECK: size = 8 + "test.data_layout_query"() : () -> !llvm.ptr> + // CHECK: alignment = 8 + // CHECK: alloca_memory_space = 0 + // CHECK: bitsize = 64 + // CHECK: preferred = 8 + // CHECK: size = 8 + "test.data_layout_query"() : () -> !llvm.ptr + // CHECK: alignment = 8 + // CHECK: alloca_memory_space = 0 + // CHECK: bitsize = 64 + // CHECK: preferred = 8 + // CHECK: size = 8 + "test.data_layout_query"() : () -> !llvm.ptr + // CHECK: alignment = 8 + // CHECK: alloca_memory_space = 0 + // CHECK: bitsize = 64 + // CHECK: preferred = 8 + // CHECK: size = 8 + "test.data_layout_query"() : () -> !llvm.ptr<5> + return + } +} + +// ----- + +module attributes { dlti.dl_spec = #dlti.dl_spec< + #dlti.dl_entry, dense<[32, 32, 64]> : vector<3xi32>>, + #dlti.dl_entry, dense<[64, 64, 64]> : vector<3xi32>>, + #dlti.dl_entry, dense<[32, 64, 64]> : vector<3xi32>>, + #dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32> +>} { + // CHECK: @spec + func.func @spec() { + // CHECK: alignment = 4 + // CHECK: alloca_memory_space = 5 + // CHECK: bitsize = 32 + // CHECK: preferred = 8 + // CHECK: size = 4 + "test.data_layout_query"() : () -> !llvm.ptr + // CHECK: alignment = 4 + // CHECK: alloca_memory_space = 5 + // CHECK: bitsize = 32 + // CHECK: preferred = 8 + // CHECK: size = 4 + "test.data_layout_query"() : () -> !llvm.ptr + // CHECK: alignment = 4 + // CHECK: alloca_memory_space = 5 + // CHECK: bitsize = 32 + // CHECK: preferred = 8 + // CHECK: size = 4 + "test.data_layout_query"() : () -> !llvm.ptr + // CHECK: alignment = 4 + // CHECK: alloca_memory_space = 5 + // CHECK: bitsize = 32 + // CHECK: preferred = 8 + // CHECK: size = 4 + "test.data_layout_query"() : () -> !llvm.ptr> + // CHECK: alignment = 4 + // CHECK: alloca_memory_space = 5 + // CHECK: bitsize = 32 + // CHECK: preferred = 8 + // CHECK: size = 4 + "test.data_layout_query"() : () -> !llvm.ptr + // CHECK: alignment = 8 + // CHECK: alloca_memory_space = 5 + // CHECK: bitsize = 64 + // CHECK: preferred = 8 + // CHECK: size = 8 + "test.data_layout_query"() : () -> !llvm.ptr + // CHECK: alignment = 4 + // CHECK: alloca_memory_space = 5 + // CHECK: bitsize = 32 + // CHECK: preferred = 8 + // CHECK: size = 4 + "test.data_layout_query"() : () -> !llvm.ptr<3> + // CHECK: alignment = 8 + // CHECK: alloca_memory_space = 5 + // CHECK: bitsize = 32 + // CHECK: preferred = 8 + // CHECK: size = 4 + "test.data_layout_query"() : () -> !llvm.ptr<4> + return + } +} + +// ----- + +// expected-error@below {{unexpected layout attribute for pointer to 'i32'}} +module attributes { dlti.dl_spec = #dlti.dl_spec< + #dlti.dl_entry, dense<[64, 64, 64]> : vector<3xi32>> +>} { + func.func @pointer() { + return + } +} + +// ----- + +// expected-error@below {{expected layout attribute for '!llvm.ptr' to be a dense integer elements attribute with 3 or 4 elements}} +module attributes { dlti.dl_spec = #dlti.dl_spec< + #dlti.dl_entry, dense<[64.0, 64.0, 64.0]> : vector<3xf32>> +>} { + func.func @pointer() { + return + } +} + +// ----- + +// expected-error@below {{preferred alignment is expected to be at least as large as ABI alignment}} +module attributes { dlti.dl_spec = #dlti.dl_spec< + #dlti.dl_entry, dense<[64, 64, 32]> : vector<3xi32>> +>} { + func.func @pointer() { + return + } +} diff --git a/mlir/test/Dialect/LLVMIR/layout.mlir b/mlir/test/Dialect/LLVMIR/layout.mlir index e5c8c0bd86db1..d6e2013cc86ca 100644 --- a/mlir/test/Dialect/LLVMIR/layout.mlir +++ b/mlir/test/Dialect/LLVMIR/layout.mlir @@ -3,42 +3,13 @@ module { // CHECK: @no_spec func.func @no_spec() { + "test.data_layout_query"() : () -> !llvm.ptr // CHECK: alignment = 8 // CHECK: alloca_memory_space = 0 // CHECK: bitsize = 64 // CHECK: preferred = 8 // CHECK: size = 8 - "test.data_layout_query"() : () -> !llvm.ptr - // CHECK: alignment = 8 - // CHECK: alloca_memory_space = 0 - // CHECK: bitsize = 64 - // CHECK: preferred = 8 - // CHECK: size = 8 - "test.data_layout_query"() : () -> !llvm.ptr - // CHECK: alignment = 8 - // CHECK: alloca_memory_space = 0 - // CHECK: bitsize = 64 - // CHECK: preferred = 8 - // CHECK: size = 8 - "test.data_layout_query"() : () -> !llvm.ptr - // CHECK: alignment = 8 - // CHECK: alloca_memory_space = 0 - // CHECK: bitsize = 64 - // CHECK: preferred = 8 - // CHECK: size = 8 - "test.data_layout_query"() : () -> !llvm.ptr> - // CHECK: alignment = 8 - // CHECK: alloca_memory_space = 0 - // CHECK: bitsize = 64 - // CHECK: preferred = 8 - // CHECK: size = 8 - "test.data_layout_query"() : () -> !llvm.ptr - // CHECK: alignment = 8 - // CHECK: alloca_memory_space = 0 - // CHECK: bitsize = 64 - // CHECK: preferred = 8 - // CHECK: size = 8 - "test.data_layout_query"() : () -> !llvm.ptr + "test.data_layout_query"() : () -> !llvm.ptr<3> // CHECK: alignment = 8 // CHECK: alloca_memory_space = 0 // CHECK: bitsize = 64 @@ -52,8 +23,8 @@ module { // ----- module attributes { dlti.dl_spec = #dlti.dl_spec< - #dlti.dl_entry, dense<[32, 32, 64]> : vector<3xi32>>, - #dlti.dl_entry, dense<[64, 64, 64]> : vector<3xi32>>, + #dlti.dl_entry : vector<3xi32>>, + #dlti.dl_entry, dense<[64, 64, 64]> : vector<3xi32>>, #dlti.dl_entry, dense<[32, 64, 64]> : vector<3xi32>>, #dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32> >} { @@ -64,37 +35,19 @@ module attributes { dlti.dl_spec = #dlti.dl_spec< // CHECK: bitsize = 32 // CHECK: preferred = 8 // CHECK: size = 4 - "test.data_layout_query"() : () -> !llvm.ptr + "test.data_layout_query"() : () -> !llvm.ptr // CHECK: alignment = 4 // CHECK: alloca_memory_space = 5 // CHECK: bitsize = 32 // CHECK: preferred = 8 // CHECK: size = 4 - "test.data_layout_query"() : () -> !llvm.ptr - // CHECK: alignment = 4 - // CHECK: alloca_memory_space = 5 - // CHECK: bitsize = 32 - // CHECK: preferred = 8 - // CHECK: size = 4 - "test.data_layout_query"() : () -> !llvm.ptr - // CHECK: alignment = 4 - // CHECK: alloca_memory_space = 5 - // CHECK: bitsize = 32 - // CHECK: preferred = 8 - // CHECK: size = 4 - "test.data_layout_query"() : () -> !llvm.ptr> - // CHECK: alignment = 4 - // CHECK: alloca_memory_space = 5 - // CHECK: bitsize = 32 - // CHECK: preferred = 8 - // CHECK: size = 4 - "test.data_layout_query"() : () -> !llvm.ptr + "test.data_layout_query"() : () -> !llvm.ptr<3> // CHECK: alignment = 8 // CHECK: alloca_memory_space = 5 // CHECK: bitsize = 64 // CHECK: preferred = 8 // CHECK: size = 8 - "test.data_layout_query"() : () -> !llvm.ptr + "test.data_layout_query"() : () -> !llvm.ptr<5> // CHECK: alignment = 4 // CHECK: alloca_memory_space = 5 // CHECK: bitsize = 32 @@ -113,20 +66,9 @@ module attributes { dlti.dl_spec = #dlti.dl_spec< // ----- -// expected-error@below {{unexpected layout attribute for pointer to 'i32'}} -module attributes { dlti.dl_spec = #dlti.dl_spec< - #dlti.dl_entry, dense<[64, 64, 64]> : vector<3xi32>> ->} { - func.func @pointer() { - return - } -} - -// ----- - -// expected-error@below {{expected layout attribute for '!llvm.ptr' to be a dense integer elements attribute with 3 or 4 elements}} +// expected-error@below {{expected layout attribute for '!llvm.ptr' to be a dense integer elements attribute with 3 or 4 elements}} module attributes { dlti.dl_spec = #dlti.dl_spec< - #dlti.dl_entry, dense<[64.0, 64.0, 64.0]> : vector<3xf32>> + #dlti.dl_entry : vector<3xf32>> >} { func.func @pointer() { return @@ -137,7 +79,7 @@ module attributes { dlti.dl_spec = #dlti.dl_spec< // expected-error@below {{preferred alignment is expected to be at least as large as ABI alignment}} module attributes { dlti.dl_spec = #dlti.dl_spec< - #dlti.dl_entry, dense<[64, 64, 32]> : vector<3xi32>> + #dlti.dl_entry : vector<3xi32>> >} { func.func @pointer() { return diff --git a/mlir/test/Dialect/LLVMIR/nvvm-typed-pointers.mlir b/mlir/test/Dialect/LLVMIR/nvvm-typed-pointers.mlir new file mode 100644 index 0000000000000..5fbadd1dc414e --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/nvvm-typed-pointers.mlir @@ -0,0 +1,55 @@ +// RUN: mlir-opt %s -split-input-file -verify-diagnostics | FileCheck %s + +// CHECK-LABEL: @nvvm_wmma_load_tf32 +func.func @nvvm_wmma_load_tf32(%arg0: !llvm.ptr, %arg1 : i32) -> !llvm.struct<(i32, i32, i32, i32)> { + // CHECK: nvvm.wmma.load {{.*}} {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 8 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + %0 = nvvm.wmma.load %arg0, %arg1 + {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 8 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + : (!llvm.ptr) -> !llvm.struct<(i32, i32, i32, i32)> + llvm.return %0 : !llvm.struct<(i32, i32, i32, i32)> +} + +// CHECK-LABEL: @cp_async +llvm.func @cp_async(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { +// CHECK: nvvm.cp.async.shared.global %{{.*}}, %{{.*}}, 16 + nvvm.cp.async.shared.global %arg0, %arg1, 16 : !llvm.ptr, !llvm.ptr +// CHECK: nvvm.cp.async.shared.global %{{.*}}, %{{.*}}, 16 {bypass_l1} + nvvm.cp.async.shared.global %arg0, %arg1, 16 {bypass_l1} : !llvm.ptr, !llvm.ptr +// CHECK: nvvm.cp.async.commit.group + nvvm.cp.async.commit.group +// CHECK: nvvm.cp.async.wait.group 0 + nvvm.cp.async.wait.group 0 + llvm.return +} + +// CHECK-LABEL: llvm.func @ld_matrix +llvm.func @ld_matrix(%arg0: !llvm.ptr) { + // CHECK: nvvm.ldmatrix %{{.*}} {layout = #nvvm.mma_layout, num = 1 : i32} : (!llvm.ptr) -> i32 + %l1 = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> i32 + // CHECK: nvvm.ldmatrix %{{.*}} {layout = #nvvm.mma_layout, num = 2 : i32} : (!llvm.ptr) -> !llvm.struct<(i32, i32)> + %l2 = nvvm.ldmatrix %arg0 {num = 2 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> !llvm.struct<(i32, i32)> + // CHECK: nvvm.ldmatrix %{{.*}} {layout = #nvvm.mma_layout, num = 4 : i32} : (!llvm.ptr) -> !llvm.struct<(i32, i32, i32, i32)> + %l4 = nvvm.ldmatrix %arg0 {num = 4 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> !llvm.struct<(i32, i32, i32, i32)> + llvm.return +} + +// CHECK-LABEL: llvm.func @redux_sync +llvm.func @redux_sync(%value : i32, %offset : i32) -> i32 { + // CHECK: nvvm.redux.sync add %{{.*}} + %r1 = nvvm.redux.sync add %value, %offset : i32 -> i32 + // CHECK: nvvm.redux.sync max %{{.*}} + %r2 = nvvm.redux.sync max %value, %offset : i32 -> i32 + // CHECK: nvvm.redux.sync min %{{.*}} + %r3 = nvvm.redux.sync min %value, %offset : i32 -> i32 + // CHECK: nvvm.redux.sync umax %{{.*}} + %r5 = nvvm.redux.sync umax %value, %offset : i32 -> i32 + // CHECK: nvvm.redux.sync umin %{{.*}} + %r6 = nvvm.redux.sync umin %value, %offset : i32 -> i32 + // CHECK: nvvm.redux.sync and %{{.*}} + %r7 = nvvm.redux.sync and %value, %offset : i32 -> i32 + // CHECK: nvvm.redux.sync or %{{.*}} + %r8 = nvvm.redux.sync or %value, %offset : i32 -> i32 + // CHECK: nvvm.redux.sync xor %{{.*}} + %r9 = nvvm.redux.sync xor %value, %offset : i32 -> i32 + llvm.return %r1 : i32 +} diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir index 6596b8503d7a5..c7c83d29638c4 100644 --- a/mlir/test/Dialect/LLVMIR/nvvm.mlir +++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir @@ -266,11 +266,11 @@ func.func @nvvm_mma_m16n8k32_s4_s4(%a0 : i32, %a1 : i32, } // CHECK-LABEL: @nvvm_wmma_load_tf32 -func.func @nvvm_wmma_load_tf32(%arg0: !llvm.ptr, %arg1 : i32) -> !llvm.struct<(i32, i32, i32, i32)> { +func.func @nvvm_wmma_load_tf32(%arg0: !llvm.ptr, %arg1 : i32) -> !llvm.struct<(i32, i32, i32, i32)> { // CHECK: nvvm.wmma.load {{.*}} {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 8 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} %0 = nvvm.wmma.load %arg0, %arg1 {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 8 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} - : (!llvm.ptr) -> !llvm.struct<(i32, i32, i32, i32)> + : (!llvm.ptr) -> !llvm.struct<(i32, i32, i32, i32)> llvm.return %0 : !llvm.struct<(i32, i32, i32, i32)> } @@ -288,11 +288,11 @@ func.func @nvvm_wmma_mma(%0 : i32, %1 : i32, %2 : i32, %3 : i32, %4 : i32, %5 : } // CHECK-LABEL: @cp_async -llvm.func @cp_async(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { +llvm.func @cp_async(%arg0: !llvm.ptr<3>, %arg1: !llvm.ptr<1>) { // CHECK: nvvm.cp.async.shared.global %{{.*}}, %{{.*}}, 16 - nvvm.cp.async.shared.global %arg0, %arg1, 16 : !llvm.ptr, !llvm.ptr + nvvm.cp.async.shared.global %arg0, %arg1, 16 : !llvm.ptr<3>, !llvm.ptr<1> // CHECK: nvvm.cp.async.shared.global %{{.*}}, %{{.*}}, 16 {bypass_l1} - nvvm.cp.async.shared.global %arg0, %arg1, 16 {bypass_l1} : !llvm.ptr, !llvm.ptr + nvvm.cp.async.shared.global %arg0, %arg1, 16 {bypass_l1} : !llvm.ptr<3>, !llvm.ptr<1> // CHECK: nvvm.cp.async.commit.group nvvm.cp.async.commit.group // CHECK: nvvm.cp.async.wait.group 0 @@ -301,18 +301,18 @@ llvm.func @cp_async(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { } // CHECK-LABEL: llvm.func @ld_matrix -llvm.func @ld_matrix(%arg0: !llvm.ptr) { - // CHECK: nvvm.ldmatrix %{{.*}} {layout = #nvvm.mma_layout, num = 1 : i32} : (!llvm.ptr) -> i32 - %l1 = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> i32 - // CHECK: nvvm.ldmatrix %{{.*}} {layout = #nvvm.mma_layout, num = 2 : i32} : (!llvm.ptr) -> !llvm.struct<(i32, i32)> - %l2 = nvvm.ldmatrix %arg0 {num = 2 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> !llvm.struct<(i32, i32)> - // CHECK: nvvm.ldmatrix %{{.*}} {layout = #nvvm.mma_layout, num = 4 : i32} : (!llvm.ptr) -> !llvm.struct<(i32, i32, i32, i32)> - %l4 = nvvm.ldmatrix %arg0 {num = 4 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> !llvm.struct<(i32, i32, i32, i32)> +llvm.func @ld_matrix(%arg0: !llvm.ptr<3>) { + // CHECK: nvvm.ldmatrix %{{.*}} {layout = #nvvm.mma_layout, num = 1 : i32} : (!llvm.ptr<3>) -> i32 + %l1 = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr<3>) -> i32 + // CHECK: nvvm.ldmatrix %{{.*}} {layout = #nvvm.mma_layout, num = 2 : i32} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32)> + %l2 = nvvm.ldmatrix %arg0 {num = 2 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32)> + // CHECK: nvvm.ldmatrix %{{.*}} {layout = #nvvm.mma_layout, num = 4 : i32} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)> + %l4 = nvvm.ldmatrix %arg0 {num = 4 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)> llvm.return } // CHECK-LABEL: llvm.func @redux_sync -llvm.func @redux_sync(%value : i32, %offset : i32) -> i32 { +llvm.func @redux_sync(%value : i32, %offset : i32) -> i32 { // CHECK: nvvm.redux.sync add %{{.*}} %r1 = nvvm.redux.sync add %value, %offset : i32 -> i32 // CHECK: nvvm.redux.sync max %{{.*}} @@ -324,9 +324,9 @@ llvm.func @redux_sync(%value : i32, %offset : i32) -> i32 { // CHECK: nvvm.redux.sync umin %{{.*}} %r6 = nvvm.redux.sync umin %value, %offset : i32 -> i32 // CHECK: nvvm.redux.sync and %{{.*}} - %r7 = nvvm.redux.sync and %value, %offset : i32 -> i32 + %r7 = nvvm.redux.sync and %value, %offset : i32 -> i32 // CHECK: nvvm.redux.sync or %{{.*}} - %r8 = nvvm.redux.sync or %value, %offset : i32 -> i32 + %r8 = nvvm.redux.sync or %value, %offset : i32 -> i32 // CHECK: nvvm.redux.sync xor %{{.*}} %r9 = nvvm.redux.sync xor %value, %offset : i32 -> i32 llvm.return %r1 : i32 diff --git a/mlir/test/Dialect/LLVMIR/parameter-attrs-invalid-typed-pointers.mlir b/mlir/test/Dialect/LLVMIR/parameter-attrs-invalid-typed-pointers.mlir new file mode 100644 index 0000000000000..65411ff41e285 --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/parameter-attrs-invalid-typed-pointers.mlir @@ -0,0 +1,6 @@ +// RUN: mlir-opt %s -split-input-file -verify-diagnostics + +// Argument attributes + +// expected-error@below {{"llvm.sret" attribute attached to LLVM pointer argument of different type}} +llvm.func @invalid_sret_attr_type(%0 : !llvm.ptr {llvm.sret = !llvm.struct<(i32)>}) diff --git a/mlir/test/Dialect/LLVMIR/parameter-attrs-invalid.mlir b/mlir/test/Dialect/LLVMIR/parameter-attrs-invalid.mlir index 72bf45052ef13..d7ee6097b3600 100644 --- a/mlir/test/Dialect/LLVMIR/parameter-attrs-invalid.mlir +++ b/mlir/test/Dialect/LLVMIR/parameter-attrs-invalid.mlir @@ -47,11 +47,6 @@ llvm.func @invalid_sret_arg_type(%0 : i32 {llvm.sret = !llvm.struct<(i32)>}) // ----- -// expected-error@below {{"llvm.sret" attribute attached to LLVM pointer argument of different type}} -llvm.func @invalid_sret_attr_type(%0 : !llvm.ptr {llvm.sret = !llvm.struct<(i32)>}) - -// ----- - // expected-error@below {{"llvm.byval" attribute attached to non-pointer LLVM type}} llvm.func @invalid_byval_arg_type(%0 : i32 {llvm.byval = !llvm.struct<(i32)>}) diff --git a/mlir/test/Dialect/LLVMIR/roundtrip-typed-pointers.mlir b/mlir/test/Dialect/LLVMIR/roundtrip-typed-pointers.mlir new file mode 100644 index 0000000000000..7cc5a6deee541 --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/roundtrip-typed-pointers.mlir @@ -0,0 +1,73 @@ +// RUN: mlir-opt %s | mlir-opt | FileCheck %s + +// CHECK-LABEL: func @ops +// CHECK-SAME: %[[I32:.*]]: i32 +func.func @ops(%arg0: i32) { +// Memory-related operations. +// +// CHECK-NEXT: %[[ALLOCA:.*]] = llvm.alloca %[[I32]] x f64 : (i32) -> !llvm.ptr +// CHECK-NEXT: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][%[[I32]], %[[I32]]] : (!llvm.ptr, i32, i32) -> !llvm.ptr +// CHECK-NEXT: %[[VALUE:.*]] = llvm.load %[[GEP]] : !llvm.ptr +// CHECK-NEXT: llvm.store %[[VALUE]], %[[ALLOCA]] : !llvm.ptr +// CHECK-NEXT: %{{.*}} = llvm.bitcast %[[ALLOCA]] : !llvm.ptr to !llvm.ptr + %13 = llvm.alloca %arg0 x f64 : (i32) -> !llvm.ptr + %14 = llvm.getelementptr %13[%arg0, %arg0] : (!llvm.ptr, i32, i32) -> !llvm.ptr + %15 = llvm.load %14 : !llvm.ptr + llvm.store %15, %13 : !llvm.ptr + %16 = llvm.bitcast %13 : !llvm.ptr to !llvm.ptr + llvm.return +} + +// CHECK-LABEL: @gep +llvm.func @gep(%ptr: !llvm.ptr)>>, %idx: i64, + %ptr2: !llvm.ptr)>>) { + // CHECK: llvm.getelementptr %{{.*}}[%{{.*}}, 1, 0] : (!llvm.ptr)>>, i64) -> !llvm.ptr + llvm.getelementptr %ptr[%idx, 1, 0] : (!llvm.ptr)>>, i64) -> !llvm.ptr + // CHECK: llvm.getelementptr inbounds %{{.*}}[%{{.*}}, 0, %{{.*}}] : (!llvm.ptr)>>, i64, i64) -> !llvm.ptr + llvm.getelementptr inbounds %ptr2[%idx, 0, %idx] : (!llvm.ptr)>>, i64, i64) -> !llvm.ptr + llvm.return +} + +// CHECK-LABEL: @alloca +func.func @alloca(%size : i64) { + // CHECK: llvm.alloca %{{.*}} x i32 : (i64) -> !llvm.ptr + llvm.alloca %size x i32 {alignment = 0} : (i64) -> (!llvm.ptr) + // CHECK: llvm.alloca inalloca %{{.*}} x i32 {alignment = 8 : i64} : (i64) -> !llvm.ptr + llvm.alloca inalloca %size x i32 {alignment = 8} : (i64) -> (!llvm.ptr) + llvm.return +} + +// CHECK-LABEL: @null +func.func @null() { + // CHECK: llvm.mlir.null : !llvm.ptr + %0 = llvm.mlir.null : !llvm.ptr + // CHECK: llvm.mlir.null : !llvm.ptr>)>>, i64)>> + %1 = llvm.mlir.null : !llvm.ptr>)>>, i64)>> + llvm.return +} + +// CHECK-LABEL: llvm.func @vararg_func +llvm.func @vararg_func(%arg0: i32, ...) { + // CHECK: %{{.*}} = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %{{.*}} = llvm.mlir.constant(1 : i32) : i32 + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA0:.+]] = llvm.alloca %{{.*}} x !llvm.struct<"struct.va_list", (ptr)> {alignment = 8 : i64} : (i32) -> !llvm.ptr)>> + // CHECK: %[[CAST0:.+]] = llvm.bitcast %[[ALLOCA0]] : !llvm.ptr)>> to !llvm.ptr + %2 = llvm.alloca %1 x !llvm.struct<"struct.va_list", (ptr)> {alignment = 8 : i64} : (i32) -> !llvm.ptr)>> + %3 = llvm.bitcast %2 : !llvm.ptr)>> to !llvm.ptr + // CHECK: llvm.intr.vastart %[[CAST0]] + llvm.intr.vastart %3 : !llvm.ptr + // CHECK: %[[ALLOCA1:.+]] = llvm.alloca %{{.*}} x !llvm.ptr {alignment = 8 : i64} : (i32) -> !llvm.ptr> + // CHECK: %[[CAST1:.+]] = llvm.bitcast %[[ALLOCA1]] : !llvm.ptr> to !llvm.ptr + %4 = llvm.alloca %0 x !llvm.ptr {alignment = 8 : i64} : (i32) -> !llvm.ptr> + %5 = llvm.bitcast %4 : !llvm.ptr> to !llvm.ptr + // CHECK: llvm.intr.vacopy %[[CAST0]] to %[[CAST1]] + llvm.intr.vacopy %3 to %5 : !llvm.ptr, !llvm.ptr + // CHECK: llvm.intr.vaend %[[CAST1]] + // CHECK: llvm.intr.vaend %[[CAST0]] + llvm.intr.vaend %5 : !llvm.ptr + llvm.intr.vaend %3 : !llvm.ptr + // CHECK: llvm.return + llvm.return +} diff --git a/mlir/test/Dialect/LLVMIR/roundtrip.mlir b/mlir/test/Dialect/LLVMIR/roundtrip.mlir index 9147027c9d4b2..b430c56fe7aa2 100644 --- a/mlir/test/Dialect/LLVMIR/roundtrip.mlir +++ b/mlir/test/Dialect/LLVMIR/roundtrip.mlir @@ -1,10 +1,10 @@ // RUN: mlir-opt %s | mlir-opt | FileCheck %s // CHECK-LABEL: func @ops -// CHECK-SAME: (%[[I32:.*]]: i32, %[[FLOAT:.*]]: f32, %[[I8PTR1:.*]]: !llvm.ptr, %[[I8PTR2:.*]]: !llvm.ptr, %[[BOOL:.*]]: i1, %[[VI8PTR1:.*]]: !llvm.vec<2 x ptr>) +// CHECK-SAME: (%[[I32:.*]]: i32, %[[FLOAT:.*]]: f32, %[[PTR1:.*]]: !llvm.ptr, %[[PTR2:.*]]: !llvm.ptr, %[[BOOL:.*]]: i1, %[[VPTR1:.*]]: !llvm.vec<2 x ptr>) func.func @ops(%arg0: i32, %arg1: f32, - %arg2: !llvm.ptr, %arg3: !llvm.ptr, - %arg4: i1, %arg5 : !llvm.vec<2x!llvm.ptr>) { + %arg2: !llvm.ptr, %arg3: !llvm.ptr, + %arg4: i1, %arg5 : !llvm.vec<2x!llvm.ptr>) { // Integer arithmetic binary operations. // // CHECK: {{.*}} = llvm.add %[[I32]], %[[I32]] : i32 @@ -16,9 +16,9 @@ func.func @ops(%arg0: i32, %arg1: f32, // CHECK: {{.*}} = llvm.srem %[[I32]], %[[I32]] : i32 // CHECK: %[[SCALAR_PRED0:.+]] = llvm.icmp "ne" %[[I32]], %[[I32]] : i32 // CHECK: {{.*}} = llvm.add %[[SCALAR_PRED0]], %[[SCALAR_PRED0]] : i1 -// CHECK: %[[SCALAR_PRED1:.+]] = llvm.icmp "ne" %[[I8PTR1]], %[[I8PTR1]] : !llvm.ptr +// CHECK: %[[SCALAR_PRED1:.+]] = llvm.icmp "ne" %[[PTR1]], %[[PTR1]] : !llvm.ptr // CHECK: {{.*}} = llvm.add %[[SCALAR_PRED1]], %[[SCALAR_PRED1]] : i1 -// CHECK: %[[VEC_PRED:.+]] = llvm.icmp "ne" %[[VI8PTR1]], %[[VI8PTR1]] : !llvm.vec<2 x ptr> +// CHECK: %[[VEC_PRED:.+]] = llvm.icmp "ne" %[[VPTR1]], %[[VPTR1]] : !llvm.vec<2 x ptr> // CHECK: {{.*}} = llvm.add %[[VEC_PRED]], %[[VEC_PRED]] : vector<2xi1> %0 = llvm.add %arg0, %arg0 : i32 %1 = llvm.sub %arg0, %arg0 : i32 @@ -29,9 +29,9 @@ func.func @ops(%arg0: i32, %arg1: f32, %6 = llvm.srem %arg0, %arg0 : i32 %7 = llvm.icmp "ne" %arg0, %arg0 : i32 %typecheck_7 = llvm.add %7, %7 : i1 - %ptrcmp = llvm.icmp "ne" %arg2, %arg2 : !llvm.ptr + %ptrcmp = llvm.icmp "ne" %arg2, %arg2 : !llvm.ptr %typecheck_ptrcmp = llvm.add %ptrcmp, %ptrcmp : i1 - %vptrcmp = llvm.icmp "ne" %arg5, %arg5 : !llvm.vec<2 x ptr> + %vptrcmp = llvm.icmp "ne" %arg5, %arg5 : !llvm.vec<2 x ptr> %typecheck_vptrcmp = llvm.add %vptrcmp, %vptrcmp : vector<2 x i1> // Floating point binary operations. @@ -49,16 +49,14 @@ func.func @ops(%arg0: i32, %arg1: f32, // Memory-related operations. // -// CHECK-NEXT: %[[ALLOCA:.*]] = llvm.alloca %[[I32]] x f64 : (i32) -> !llvm.ptr -// CHECK-NEXT: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][%[[I32]], %[[I32]]] : (!llvm.ptr, i32, i32) -> !llvm.ptr -// CHECK-NEXT: %[[VALUE:.*]] = llvm.load %[[GEP]] : !llvm.ptr -// CHECK-NEXT: llvm.store %[[VALUE]], %[[ALLOCA]] : !llvm.ptr -// CHECK-NEXT: %{{.*}} = llvm.bitcast %[[ALLOCA]] : !llvm.ptr to !llvm.ptr - %13 = llvm.alloca %arg0 x f64 : (i32) -> !llvm.ptr - %14 = llvm.getelementptr %13[%arg0, %arg0] : (!llvm.ptr, i32, i32) -> !llvm.ptr - %15 = llvm.load %14 : !llvm.ptr - llvm.store %15, %13 : !llvm.ptr - %16 = llvm.bitcast %13 : !llvm.ptr to !llvm.ptr +// CHECK-NEXT: %[[ALLOCA:.*]] = llvm.alloca %[[I32]] x f64 : (i32) -> !llvm.ptr +// CHECK-NEXT: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][%[[I32]], %[[I32]]] : (!llvm.ptr, i32, i32) -> !llvm.ptr, f64 +// CHECK-NEXT: %[[VALUE:.*]] = llvm.load %[[GEP]] : !llvm.ptr -> f64 +// CHECK-NEXT: llvm.store %[[VALUE]], %[[ALLOCA]] : f64, !llvm.ptr + %13 = llvm.alloca %arg0 x f64 : (i32) -> !llvm.ptr + %14 = llvm.getelementptr %13[%arg0, %arg0] : (!llvm.ptr, i32, i32) -> !llvm.ptr, f64 + %15 = llvm.load %14 : !llvm.ptr -> f64 + llvm.store %15, %13 : f64, !llvm.ptr // Function call-related operations. // @@ -130,10 +128,10 @@ func.func @ops(%arg0: i32, %arg1: f32, // Integer to pointer and pointer to integer conversions. // -// CHECK: %[[PTR:.*]] = llvm.inttoptr %[[I32]] : i32 to !llvm.ptr -// CHECK: %{{.*}} = llvm.ptrtoint %[[PTR]] : !llvm.ptr to i32 - %25 = llvm.inttoptr %arg0 : i32 to !llvm.ptr - %26 = llvm.ptrtoint %25 : !llvm.ptr to i32 +// CHECK: %[[PTR:.*]] = llvm.inttoptr %[[I32]] : i32 to !llvm.ptr +// CHECK: %{{.*}} = llvm.ptrtoint %[[PTR]] : !llvm.ptr to i32 + %25 = llvm.inttoptr %arg0 : i32 to !llvm.ptr + %26 = llvm.ptrtoint %25 : !llvm.ptr to i32 // Extended and Quad floating point // @@ -163,28 +161,27 @@ func.func @ops(%arg0: i32, %arg1: f32, // CHECK: llvm.intr.round(%[[FLOAT]]) : (f32) -> f32 %34 = llvm.intr.round(%arg1) : (f32) -> f32 -// CHECK: "llvm.intr.memcpy"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () - "llvm.intr.memcpy"(%arg2, %arg3, %arg0, %arg4) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () +// CHECK: "llvm.intr.memcpy"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () + "llvm.intr.memcpy"(%arg2, %arg3, %arg0, %arg4) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () -// CHECK: "llvm.intr.memcpy"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () - "llvm.intr.memcpy"(%arg2, %arg3, %arg0, %arg4) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () +// CHECK: "llvm.intr.memcpy"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () + "llvm.intr.memcpy"(%arg2, %arg3, %arg0, %arg4) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () // CHECK: %[[SZ:.*]] = llvm.mlir.constant %sz = llvm.mlir.constant(10: i64) : i64 -// CHECK: "llvm.intr.memcpy.inline"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, !llvm.ptr, i64, i1) -> () - "llvm.intr.memcpy.inline"(%arg2, %arg3, %sz, %arg4) : (!llvm.ptr, !llvm.ptr, i64, i1) -> () +// CHECK: "llvm.intr.memcpy.inline"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, !llvm.ptr, i64, i1) -> () + "llvm.intr.memcpy.inline"(%arg2, %arg3, %sz, %arg4) : (!llvm.ptr, !llvm.ptr, i64, i1) -> () // CHECK: llvm.return llvm.return } // CHECK-LABEL: @gep -llvm.func @gep(%ptr: !llvm.ptr)>>, %idx: i64, - %ptr2: !llvm.ptr)>>) { - // CHECK: llvm.getelementptr %{{.*}}[%{{.*}}, 1, 0] : (!llvm.ptr)>>, i64) -> !llvm.ptr - llvm.getelementptr %ptr[%idx, 1, 0] : (!llvm.ptr)>>, i64) -> !llvm.ptr - // CHECK: llvm.getelementptr inbounds %{{.*}}[%{{.*}}, 0, %{{.*}}] : (!llvm.ptr)>>, i64, i64) -> !llvm.ptr - llvm.getelementptr inbounds %ptr2[%idx, 0, %idx] : (!llvm.ptr)>>, i64, i64) -> !llvm.ptr +llvm.func @gep(%ptr: !llvm.ptr, %idx: i64, %ptr2: !llvm.ptr) { + // CHECK: llvm.getelementptr %{{.*}}[%{{.*}}, 1, 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(i32, struct<(i32, f32)>)> + llvm.getelementptr %ptr[%idx, 1, 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(i32, struct<(i32, f32)>)> + // CHECK: llvm.getelementptr inbounds %{{.*}}[%{{.*}}, 0, %{{.*}}] : (!llvm.ptr, i64, i64) -> !llvm.ptr, !llvm.struct<(array<10 x f32>)> + llvm.getelementptr inbounds %ptr2[%idx, 0, %idx] : (!llvm.ptr, i64, i64) -> !llvm.ptr, !llvm.struct<(array<10 x f32>)> llvm.return } @@ -248,9 +245,9 @@ llvm.func @foo(%arg0: i32) -> !llvm.struct<(i32, f64, i32)> { } // CHECK-LABEL: @casts -// CHECK-SAME: (%[[I32:.*]]: i32, %[[I64:.*]]: i64, %[[V4I32:.*]]: vector<4xi32>, %[[V4I64:.*]]: vector<4xi64>, %[[I32PTR:.*]]: !llvm.ptr) +// CHECK-SAME: (%[[I32:.*]]: i32, %[[I64:.*]]: i64, %[[V4I32:.*]]: vector<4xi32>, %[[V4I64:.*]]: vector<4xi64>, %[[PTR:.*]]: !llvm.ptr) func.func @casts(%arg0: i32, %arg1: i64, %arg2: vector<4xi32>, - %arg3: vector<4xi64>, %arg4: !llvm.ptr) { + %arg3: vector<4xi64>, %arg4: !llvm.ptr) { // CHECK: = llvm.sext %[[I32]] : i32 to i56 %0 = llvm.sext %arg0 : i32 to i56 // CHECK: = llvm.zext %[[I32]] : i32 to i64 @@ -271,21 +268,23 @@ func.func @casts(%arg0: i32, %arg1: i64, %arg2: vector<4xi32>, %8 = llvm.fptosi %7 : f32 to i32 // CHECK: = llvm.fptoui %[[FLOAT]] : f32 to i32 %9 = llvm.fptoui %7 : f32 to i32 -// CHECK: = llvm.addrspacecast %[[I32PTR]] : !llvm.ptr to !llvm.ptr - %10 = llvm.addrspacecast %arg4 : !llvm.ptr to !llvm.ptr +// CHECK: = llvm.addrspacecast %[[PTR]] : !llvm.ptr to !llvm.ptr<2> + %10 = llvm.addrspacecast %arg4 : !llvm.ptr to !llvm.ptr<2> +// CHECK: = llvm.bitcast %[[I64]] : i64 to f64 + %11 = llvm.bitcast %arg1 : i64 to f64 llvm.return } // CHECK-LABEL: @vect -func.func @vect(%arg0: vector<4xf32>, %arg1: i32, %arg2: f32, %arg3: !llvm.vec<2 x ptr>) { +func.func @vect(%arg0: vector<4xf32>, %arg1: i32, %arg2: f32, %arg3: !llvm.vec<2 x ptr>) { // CHECK: = llvm.extractelement {{.*}} : vector<4xf32> %0 = llvm.extractelement %arg0[%arg1 : i32] : vector<4xf32> // CHECK: = llvm.insertelement {{.*}} : vector<4xf32> %1 = llvm.insertelement %arg2, %arg0[%arg1 : i32] : vector<4xf32> // CHECK: = llvm.shufflevector {{.*}} [0, 0, 0, 0, 7] : vector<4xf32> %2 = llvm.shufflevector %arg0, %arg0 [0, 0, 0, 0, 7] : vector<4xf32> -// CHECK: = llvm.shufflevector %{{.+}}, %{{.+}} [1, 0] : !llvm.vec<2 x ptr> - %3 = llvm.shufflevector %arg3, %arg3 [1, 0] : !llvm.vec<2 x ptr> +// CHECK: = llvm.shufflevector %{{.+}}, %{{.+}} [1, 0] : !llvm.vec<2 x ptr> + %3 = llvm.shufflevector %arg3, %arg3 [1, 0] : !llvm.vec<2 x ptr> // CHECK: = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : vector<4xf32> %4 = llvm.mlir.constant(dense<1.0> : vector<4xf32>) : vector<4xf32> return @@ -323,19 +322,17 @@ func.func @mixed_vect(%arg0: vector<8xf32>, %arg1: vector<4xf32>, %arg2: vector< // CHECK-LABEL: @alloca func.func @alloca(%size : i64) { - // CHECK: llvm.alloca %{{.*}} x i32 : (i64) -> !llvm.ptr - llvm.alloca %size x i32 {alignment = 0} : (i64) -> (!llvm.ptr) - // CHECK: llvm.alloca inalloca %{{.*}} x i32 {alignment = 8 : i64} : (i64) -> !llvm.ptr - llvm.alloca inalloca %size x i32 {alignment = 8} : (i64) -> (!llvm.ptr) + // CHECK: llvm.alloca %{{.*}} x i32 : (i64) -> !llvm.ptr + llvm.alloca %size x i32 {alignment = 0} : (i64) -> (!llvm.ptr) + // CHECK: llvm.alloca inalloca %{{.*}} x i32 {alignment = 8 : i64} : (i64) -> !llvm.ptr + llvm.alloca inalloca %size x i32 {alignment = 8} : (i64) -> (!llvm.ptr) llvm.return } // CHECK-LABEL: @null func.func @null() { - // CHECK: llvm.mlir.null : !llvm.ptr - %0 = llvm.mlir.null : !llvm.ptr - // CHECK: llvm.mlir.null : !llvm.ptr>)>>, i64)>> - %1 = llvm.mlir.null : !llvm.ptr>)>>, i64)>> + // CHECK: llvm.mlir.null : !llvm.ptr + %0 = llvm.mlir.null : !llvm.ptr llvm.return } @@ -375,61 +372,57 @@ func.func @cmpxchg(%ptr : !llvm.ptr, %cmp : i32, %new : i32) { llvm.return } -llvm.mlir.global external constant @_ZTIi() : !llvm.ptr -llvm.func @bar(!llvm.ptr, !llvm.ptr, !llvm.ptr) +llvm.mlir.global external constant @_ZTIi() : !llvm.ptr +llvm.func @bar(!llvm.ptr, !llvm.ptr, !llvm.ptr) llvm.func @__gxx_personality_v0(...) -> i32 // CHECK-LABEL: @invokeLandingpad llvm.func @invokeLandingpad() -> i32 attributes { personality = @__gxx_personality_v0 } { -// CHECK: %[[a0:.*]] = llvm.mlir.constant(0 : i32) : i32 +// CHECK: %[[V0:.*]] = llvm.mlir.constant(0 : i32) : i32 // CHECK: %{{.*}} = llvm.mlir.constant(3 : i32) : i32 -// CHECK: %[[a2:.*]] = llvm.mlir.constant("\01") : !llvm.array<1 x i8> -// CHECK: %[[a3:.*]] = llvm.mlir.null : !llvm.ptr> -// CHECK: %[[a4:.*]] = llvm.mlir.null : !llvm.ptr -// CHECK: %[[a5:.*]] = llvm.mlir.addressof @_ZTIi : !llvm.ptr> -// CHECK: %[[a6:.*]] = llvm.bitcast %[[a5]] : !llvm.ptr> to !llvm.ptr -// CHECK: %[[a7:.*]] = llvm.mlir.constant(1 : i32) : i32 -// CHECK: %[[a8:.*]] = llvm.alloca %[[a7]] x i8 : (i32) -> !llvm.ptr -// CHECK: %{{.*}} = llvm.invoke @foo(%[[a7]]) to ^[[BB2:.*]] unwind ^[[BB1:.*]] : (i32) -> !llvm.struct<(i32, f64, i32)> +// CHECK: %[[V1:.*]] = llvm.mlir.constant("\01") : !llvm.array<1 x i8> +// CHECK: %[[V2:.*]] = llvm.mlir.null : !llvm.ptr +// CHECK: %[[V3:.*]] = llvm.mlir.addressof @_ZTIi : !llvm.ptr +// CHECK: %[[V4:.*]] = llvm.mlir.constant(1 : i32) : i32 +// CHECK: %[[V5:.*]] = llvm.alloca %[[V4]] x i8 : (i32) -> !llvm.ptr +// CHECK: %{{.*}} = llvm.invoke @foo(%[[V4]]) to ^[[BB2:.*]] unwind ^[[BB1:.*]] : (i32) -> !llvm.struct<(i32, f64, i32)> %0 = llvm.mlir.constant(0 : i32) : i32 %1 = llvm.mlir.constant(3 : i32) : i32 %2 = llvm.mlir.constant("\01") : !llvm.array<1 x i8> - %3 = llvm.mlir.null : !llvm.ptr> - %4 = llvm.mlir.null : !llvm.ptr - %5 = llvm.mlir.addressof @_ZTIi : !llvm.ptr> - %6 = llvm.bitcast %5 : !llvm.ptr> to !llvm.ptr - %7 = llvm.mlir.constant(1 : i32) : i32 - %8 = llvm.alloca %7 x i8 : (i32) -> !llvm.ptr - %9 = llvm.invoke @foo(%7) to ^bb2 unwind ^bb1 : (i32) -> !llvm.struct<(i32, f64, i32)> + %3 = llvm.mlir.null : !llvm.ptr + %4 = llvm.mlir.addressof @_ZTIi : !llvm.ptr + %5 = llvm.mlir.constant(1 : i32) : i32 + %6 = llvm.alloca %5 x i8 : (i32) -> !llvm.ptr + %7 = llvm.invoke @foo(%5) to ^bb2 unwind ^bb1 : (i32) -> !llvm.struct<(i32, f64, i32)> // CHECK: ^[[BB1]]: -// CHECK: %[[lp:.*]] = llvm.landingpad cleanup (catch %[[a3]] : !llvm.ptr>) (catch %[[a6]] : !llvm.ptr) (filter %[[a2]] : !llvm.array<1 x i8>) : !llvm.struct<(ptr, i32)> -// CHECK: %{{.*}} = llvm.intr.eh.typeid.for %6 : (!llvm.ptr) -> i32 -// CHECK: llvm.resume %[[lp]] : !llvm.struct<(ptr, i32)> +// CHECK: %[[lp:.*]] = llvm.landingpad cleanup (catch %[[V2]] : !llvm.ptr) (catch %[[V3]] : !llvm.ptr) (filter %[[V1]] : !llvm.array<1 x i8>) : !llvm.struct<(ptr, i32)> +// CHECK: %{{.*}} = llvm.intr.eh.typeid.for %[[V3]] : (!llvm.ptr) -> i32 +// CHECK: llvm.resume %[[lp]] : !llvm.struct<(ptr, i32)> ^bb1: - %10 = llvm.landingpad cleanup (catch %3 : !llvm.ptr>) (catch %6 : !llvm.ptr) (filter %2 : !llvm.array<1 x i8>) : !llvm.struct<(ptr, i32)> - %11 = llvm.intr.eh.typeid.for %6 : (!llvm.ptr) -> i32 - llvm.resume %10 : !llvm.struct<(ptr, i32)> + %10 = llvm.landingpad cleanup (catch %3 : !llvm.ptr) (catch %4 : !llvm.ptr) (filter %2 : !llvm.array<1 x i8>) : !llvm.struct<(ptr, i32)> + %11 = llvm.intr.eh.typeid.for %4 : (!llvm.ptr) -> i32 + llvm.resume %10 : !llvm.struct<(ptr, i32)> // CHECK: ^[[BB2]]: -// CHECK: llvm.return %[[a7]] : i32 +// CHECK: llvm.return %[[V4]] : i32 ^bb2: - llvm.return %7 : i32 + llvm.return %5 : i32 // CHECK: ^[[BB3:.*]]: -// CHECK: llvm.invoke @bar(%[[a8]], %[[a6]], %[[a4]]) to ^[[BB2]] unwind ^[[BB1]] : (!llvm.ptr, !llvm.ptr, !llvm.ptr) -> () +// CHECK: llvm.invoke @bar(%[[V5]], %[[V3]], %[[V2]]) to ^[[BB2]] unwind ^[[BB1]] : (!llvm.ptr, !llvm.ptr, !llvm.ptr) -> () ^bb3: - llvm.invoke @bar(%8, %6, %4) to ^bb2 unwind ^bb1 : (!llvm.ptr, !llvm.ptr, !llvm.ptr) -> () + llvm.invoke @bar(%6, %4, %3) to ^bb2 unwind ^bb1 : (!llvm.ptr, !llvm.ptr, !llvm.ptr) -> () // CHECK: ^[[BB4:.*]]: // CHECK: %[[FUNC:.*]] = llvm.mlir.addressof @foo : !llvm.ptr // CHECK: %{{.*}} = llvm.invoke %[[FUNC]]{{.*}}: !llvm.ptr, ^bb4: %12 = llvm.mlir.addressof @foo : !llvm.ptr - %13 = llvm.invoke %12(%7) to ^bb2 unwind ^bb1 : !llvm.ptr, (i32) -> !llvm.struct<(i32, f64, i32)> + %13 = llvm.invoke %12(%5) to ^bb2 unwind ^bb1 : !llvm.ptr, (i32) -> !llvm.struct<(i32, f64, i32)> // CHECK: ^[[BB5:.*]]: -// CHECK: llvm.return %[[a0]] : i32 +// CHECK: llvm.return %[[V0]] : i32 ^bb5: llvm.return %0 : i32 } @@ -438,10 +431,14 @@ llvm.func @invokeLandingpad() -> i32 attributes { personality = @__gxx_personali func.func @useFreezeOp(%arg0: i32) { // CHECK: = llvm.freeze %[[ARG0:.*]] : i32 %0 = llvm.freeze %arg0 : i32 - // CHECK: %[[x:.*]] = llvm.mlir.undef : i8 + // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef : i8 %1 = llvm.mlir.undef : i8 - // CHECK: = llvm.freeze %[[x]] : i8 + // CHECK: = llvm.freeze %[[UNDEF]] : i8 %2 = llvm.freeze %1 : i8 + // CHECK: %[[POISON:.*]] = llvm.mlir.poison : i8 + %3 = llvm.mlir.poison : i8 + // CHECK: = llvm.freeze %[[POISON]] : i8 + %4 = llvm.freeze %3 : i8 return } @@ -524,32 +521,6 @@ func.func @fastmathFlags(%arg0: f32, %arg1: f32, %arg2: i32, %arg3: vector<2 x f return } -// CHECK-LABEL: llvm.func @vararg_func -llvm.func @vararg_func(%arg0: i32, ...) { - // CHECK: %{{.*}} = llvm.mlir.constant(1 : i32) : i32 - // CHECK: %{{.*}} = llvm.mlir.constant(1 : i32) : i32 - %0 = llvm.mlir.constant(1 : i32) : i32 - %1 = llvm.mlir.constant(1 : i32) : i32 - // CHECK: %[[ALLOCA0:.+]] = llvm.alloca %{{.*}} x !llvm.struct<"struct.va_list", (ptr)> {alignment = 8 : i64} : (i32) -> !llvm.ptr)>> - // CHECK: %[[CAST0:.+]] = llvm.bitcast %[[ALLOCA0]] : !llvm.ptr)>> to !llvm.ptr - %2 = llvm.alloca %1 x !llvm.struct<"struct.va_list", (ptr)> {alignment = 8 : i64} : (i32) -> !llvm.ptr)>> - %3 = llvm.bitcast %2 : !llvm.ptr)>> to !llvm.ptr - // CHECK: llvm.intr.vastart %[[CAST0]] - llvm.intr.vastart %3 : !llvm.ptr - // CHECK: %[[ALLOCA1:.+]] = llvm.alloca %{{.*}} x !llvm.ptr {alignment = 8 : i64} : (i32) -> !llvm.ptr> - // CHECK: %[[CAST1:.+]] = llvm.bitcast %[[ALLOCA1]] : !llvm.ptr> to !llvm.ptr - %4 = llvm.alloca %0 x !llvm.ptr {alignment = 8 : i64} : (i32) -> !llvm.ptr> - %5 = llvm.bitcast %4 : !llvm.ptr> to !llvm.ptr - // CHECK: llvm.intr.vacopy %[[CAST0]] to %[[CAST1]] - llvm.intr.vacopy %3 to %5 : !llvm.ptr, !llvm.ptr - // CHECK: llvm.intr.vaend %[[CAST1]] - // CHECK: llvm.intr.vaend %[[CAST0]] - llvm.intr.vaend %5 : !llvm.ptr - llvm.intr.vaend %3 : !llvm.ptr - // CHECK: llvm.return - llvm.return -} - // CHECK-LABEL: @lifetime // CHECK-SAME: %[[P:.*]]: !llvm.ptr llvm.func @lifetime(%p: !llvm.ptr) { @@ -560,8 +531,8 @@ llvm.func @lifetime(%p: !llvm.ptr) { llvm.return } -// CHECK-LABEL: @vararg_func_opaque_pointers -llvm.func @vararg_func_opaque_pointers(%arg0: i32, ...) { +// CHECK-LABEL: @vararg_func +llvm.func @vararg_func(%arg0: i32, ...) { // CHECK: %[[C:.*]] = llvm.mlir.constant(1 : i32) // CHECK: %[[LIST:.*]] = llvm.alloca // CHECK: llvm.intr.vastart %[[LIST]] : !llvm.ptr{{$}} @@ -581,17 +552,17 @@ llvm.func @vararg_func_opaque_pointers(%arg0: i32, ...) { llvm.return } -// CHECK-LABEL: @eh_typeid_opaque_pointers +// CHECK-LABEL: @eh_typeid // CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr -llvm.func @eh_typeid_opaque_pointers(%arg0: !llvm.ptr) -> i32 { +llvm.func @eh_typeid(%arg0: !llvm.ptr) -> i32 { // CHECK: llvm.intr.eh.typeid.for %[[ARG0]] : (!llvm.ptr) -> i32 %0 = llvm.intr.eh.typeid.for %arg0 : (!llvm.ptr) -> i32 llvm.return %0 : i32 } -// CHECK-LABEL: @stackrestore_opaque_pointers +// CHECK-LABEL: @stackrestore // CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr -llvm.func @stackrestore_opaque_pointers(%arg0: !llvm.ptr) { +llvm.func @stackrestore(%arg0: !llvm.ptr) { // CHECK: llvm.intr.stackrestore %[[ARG0]] : !llvm.ptr llvm.intr.stackrestore %arg0 : !llvm.ptr llvm.return diff --git a/mlir/test/Dialect/LLVMIR/types-invalid-typed-pointers.mlir b/mlir/test/Dialect/LLVMIR/types-invalid-typed-pointers.mlir new file mode 100644 index 0000000000000..475fadede8fbf --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/types-invalid-typed-pointers.mlir @@ -0,0 +1,42 @@ +// RUN: mlir-opt --allow-unregistered-dialect -split-input-file -verify-diagnostics %s + +func.func @void_pointer() { + // expected-error @+1 {{invalid pointer element type}} + "some.op"() : () -> !llvm.ptr +} + +// ----- + +func.func @repeated_struct_name() { + "some.op"() : () -> !llvm.struct<"a", (ptr>)> + // expected-error @+1 {{identified type already used with a different body}} + "some.op"() : () -> !llvm.struct<"a", (i32)> +} + +// ----- + +func.func @dynamic_vector() { + // expected-error @+1 {{expected '? x x ' or ' x '}} + "some.op"() : () -> !llvm.vec> +} + +// ----- + +func.func @dynamic_scalable_vector() { + // expected-error @+1 {{expected '? x x ' or ' x '}} + "some.op"() : () -> !llvm.vec> +} + +// ----- + +func.func @unscalable_vector() { + // expected-error @+1 {{expected '? x x ' or ' x '}} + "some.op"() : () -> !llvm.vec<4x4 x ptr> +} + +// ----- + +func.func @zero_vector() { + // expected-error @+1 {{the number of vector elements must be positive}} + "some.op"() : () -> !llvm.vec<0 x ptr> +} diff --git a/mlir/test/Dialect/LLVMIR/types-invalid.mlir b/mlir/test/Dialect/LLVMIR/types-invalid.mlir index d8ac523b86d93..fce100e6a865c 100644 --- a/mlir/test/Dialect/LLVMIR/types-invalid.mlir +++ b/mlir/test/Dialect/LLVMIR/types-invalid.mlir @@ -21,15 +21,8 @@ func.func @function_taking_function() { // ----- -func.func @void_pointer() { - // expected-error @+1 {{invalid pointer element type}} - "some.op"() : () -> !llvm.ptr -} - -// ----- - func.func @repeated_struct_name() { - "some.op"() : () -> !llvm.struct<"a", (ptr>)> + "some.op"() : () -> !llvm.struct<"a", (ptr)> // expected-error @+1 {{identified type already used with a different body}} "some.op"() : () -> !llvm.struct<"a", (i32)> } @@ -113,28 +106,28 @@ func.func @identified_struct_with_void() { func.func @dynamic_vector() { // expected-error @+1 {{expected '? x x ' or ' x '}} - "some.op"() : () -> !llvm.vec> + "some.op"() : () -> !llvm.vec } // ----- func.func @dynamic_scalable_vector() { // expected-error @+1 {{expected '? x x ' or ' x '}} - "some.op"() : () -> !llvm.vec> + "some.op"() : () -> !llvm.vec } // ----- func.func @unscalable_vector() { // expected-error @+1 {{expected '? x x ' or ' x '}} - "some.op"() : () -> !llvm.vec<4x4 x ptr> + "some.op"() : () -> !llvm.vec<4x4 x ptr> } // ----- func.func @zero_vector() { // expected-error @+1 {{the number of vector elements must be positive}} - "some.op"() : () -> !llvm.vec<0 x ptr> + "some.op"() : () -> !llvm.vec<0 x ptr> } // ----- diff --git a/mlir/test/Dialect/LLVMIR/types-typed-pointers.mlir b/mlir/test/Dialect/LLVMIR/types-typed-pointers.mlir new file mode 100644 index 0000000000000..2d63f379c2ee7 --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/types-typed-pointers.mlir @@ -0,0 +1,118 @@ +// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file | mlir-opt -allow-unregistered-dialect | FileCheck %s + +// CHECK-LABEL: @ptr +func.func @ptr() { + // CHECK: !llvm.ptr + "some.op"() : () -> !llvm.ptr + // CHECK: !llvm.ptr + "some.op"() : () -> !llvm.ptr + // CHECK: !llvm.ptr> + "some.op"() : () -> !llvm.ptr> + // CHECK: !llvm.ptr>>>> + "some.op"() : () -> !llvm.ptr>>>> + // CHECK: !llvm.ptr + "some.op"() : () -> !llvm.ptr + // CHECK: !llvm.ptr + "some.op"() : () -> !llvm.ptr + // CHECK: !llvm.ptr + "some.op"() : () -> !llvm.ptr + // CHECK: !llvm.ptr, 9> + "some.op"() : () -> !llvm.ptr, 9> + // CHECK: !llvm.ptr + "some.op"() : () -> !llvm.ptr + // CHECK: !llvm.ptr<42> + "some.op"() : () -> !llvm.ptr<42> + return +} + +// CHECK-LABEL: @vec +func.func @vec() { + // CHECK: vector<4xi32> + "some.op"() : () -> vector<4xi32> + // CHECK: vector<4xf32> + "some.op"() : () -> vector<4xf32> + // CHECK: !llvm.vec + "some.op"() : () -> !llvm.vec + // CHECK: !llvm.vec + "some.op"() : () -> !llvm.vec + // CHECK: !llvm.vec<4 x ptr> + "some.op"() : () -> !llvm.vec<4 x ptr> + return +} + +// CHECK-LABEL: @array +func.func @array() { + // CHECK: !llvm.array<10 x i32> + "some.op"() : () -> !llvm.array<10 x i32> + // CHECK: !llvm.array<8 x f32> + "some.op"() : () -> !llvm.array<8 x f32> + // CHECK: !llvm.array<10 x ptr> + "some.op"() : () -> !llvm.array<10 x ptr> + // CHECK: !llvm.array<10 x array<4 x f32>> + "some.op"() : () -> !llvm.array<10 x array<4 x f32>> + return +} + +// CHECK-LABEL: @identified_struct +func.func @identified_struct() { + // CHECK: !llvm.struct<"empty", ()> + "some.op"() : () -> !llvm.struct<"empty", ()> + // CHECK: !llvm.struct<"opaque", opaque> + "some.op"() : () -> !llvm.struct<"opaque", opaque> + // CHECK: !llvm.struct<"long", (i32, struct<(i32, i1)>, f32, ptr>)> + "some.op"() : () -> !llvm.struct<"long", (i32, struct<(i32, i1)>, f32, ptr>)> + // CHECK: !llvm.struct<"self-recursive", (ptr>)> + "some.op"() : () -> !llvm.struct<"self-recursive", (ptr>)> + // CHECK: !llvm.struct<"unpacked", (i32)> + "some.op"() : () -> !llvm.struct<"unpacked", (i32)> + // CHECK: !llvm.struct<"packed", packed (i32)> + "some.op"() : () -> !llvm.struct<"packed", packed (i32)> + // CHECK: !llvm.struct<"name with spaces and !^$@$#", packed (i32)> + "some.op"() : () -> !llvm.struct<"name with spaces and !^$@$#", packed (i32)> + + // CHECK: !llvm.struct<"mutually-a", (ptr, 3>)>>)> + "some.op"() : () -> !llvm.struct<"mutually-a", (ptr, 3>)>>)> + // CHECK: !llvm.struct<"mutually-b", (ptr>)>, 3>)> + "some.op"() : () -> !llvm.struct<"mutually-b", (ptr>)>, 3>)> + // CHECK: !llvm.struct<"referring-another", (ptr>)> + "some.op"() : () -> !llvm.struct<"referring-another", (ptr>)> + + // CHECK: !llvm.struct<"struct-of-arrays", (array<10 x i32>)> + "some.op"() : () -> !llvm.struct<"struct-of-arrays", (array<10 x i32>)> + // CHECK: !llvm.array<10 x struct<"array-of-structs", (i32)>> + "some.op"() : () -> !llvm.array<10 x struct<"array-of-structs", (i32)>> + // CHECK: !llvm.ptr> + "some.op"() : () -> !llvm.ptr> + return +} + +// CHECK-LABEL: @ptr_elem_interface +// CHECK-COUNT-3: !llvm.ptr +// CHECK: llvm.mlir.undef : !llvm.ptr +func.func @ptr_elem_interface(%arg0: !llvm.ptr) { + %0 = llvm.load %arg0 : !llvm.ptr + llvm.store %0, %arg0 : !llvm.ptr + llvm.mlir.undef : !llvm.ptr + return +} + +// ----- + +// Check that type aliases can be used inside LLVM dialect types. Note that +// currently they are _not_ printed back as this would require +// DialectAsmPrinter to have a mechanism for querying the presence and +// usability of an alias outside of its `printType` method. + +!baz = i64 +!qux = !llvm.struct<(!baz)> + +!rec = !llvm.struct<"a", (ptr>)> + +// CHECK: aliases +llvm.func @aliases() { + // CHECK: !llvm.struct<(i32, f32, struct<(i64)>)> + "some.op"() : () -> !llvm.struct<(i32, f32, !qux)> + // CHECK: !llvm.struct<"a", (ptr>)> + "some.op"() : () -> !rec + llvm.return +} diff --git a/mlir/test/Dialect/LLVMIR/types.mlir b/mlir/test/Dialect/LLVMIR/types.mlir index 54c44a6aa58ab..42352ce697f02 100644 --- a/mlir/test/Dialect/LLVMIR/types.mlir +++ b/mlir/test/Dialect/LLVMIR/types.mlir @@ -57,26 +57,14 @@ func.func @integer() { // CHECK-LABEL: @ptr func.func @ptr() { - // CHECK: !llvm.ptr - "some.op"() : () -> !llvm.ptr - // CHECK: !llvm.ptr - "some.op"() : () -> !llvm.ptr - // CHECK: !llvm.ptr> - "some.op"() : () -> !llvm.ptr> - // CHECK: !llvm.ptr>>>> - "some.op"() : () -> !llvm.ptr>>>> - // CHECK: !llvm.ptr - "some.op"() : () -> !llvm.ptr - // CHECK: !llvm.ptr - "some.op"() : () -> !llvm.ptr - // CHECK: !llvm.ptr - "some.op"() : () -> !llvm.ptr - // CHECK: !llvm.ptr, 9> - "some.op"() : () -> !llvm.ptr, 9> // CHECK: !llvm.ptr "some.op"() : () -> !llvm.ptr + // CHECK: !llvm.ptr + "some.op"() : () -> !llvm.ptr<0> // CHECK: !llvm.ptr<42> "some.op"() : () -> !llvm.ptr<42> + // CHECK: !llvm.ptr, 9> + "some.op"() : () -> !llvm.ptr, 9> return } @@ -90,8 +78,8 @@ func.func @vec() { "some.op"() : () -> !llvm.vec // CHECK: !llvm.vec "some.op"() : () -> !llvm.vec - // CHECK: !llvm.vec<4 x ptr> - "some.op"() : () -> !llvm.vec<4 x ptr> + // CHECK: !llvm.vec<4 x ptr> + "some.op"() : () -> !llvm.vec<4 x ptr> return } @@ -101,8 +89,8 @@ func.func @array() { "some.op"() : () -> !llvm.array<10 x i32> // CHECK: !llvm.array<8 x f32> "some.op"() : () -> !llvm.array<8 x f32> - // CHECK: !llvm.array<10 x ptr> - "some.op"() : () -> !llvm.array<10 x ptr> + // CHECK: !llvm.array<10 x ptr<4>> + "some.op"() : () -> !llvm.array<10 x ptr<4>> // CHECK: !llvm.array<10 x array<4 x f32>> "some.op"() : () -> !llvm.array<10 x array<4 x f32>> return @@ -147,30 +135,22 @@ func.func @identified_struct() { "some.op"() : () -> !llvm.struct<"empty", ()> // CHECK: !llvm.struct<"opaque", opaque> "some.op"() : () -> !llvm.struct<"opaque", opaque> - // CHECK: !llvm.struct<"long", (i32, struct<(i32, i1)>, f32, ptr>)> - "some.op"() : () -> !llvm.struct<"long", (i32, struct<(i32, i1)>, f32, ptr>)> - // CHECK: !llvm.struct<"self-recursive", (ptr>)> - "some.op"() : () -> !llvm.struct<"self-recursive", (ptr>)> + // CHECK: !llvm.struct<"long", (i32, struct<(i32, i1)>, f32, ptr)> + "some.op"() : () -> !llvm.struct<"long", (i32, struct<(i32, i1)>, f32, ptr)> // CHECK: !llvm.struct<"unpacked", (i32)> "some.op"() : () -> !llvm.struct<"unpacked", (i32)> // CHECK: !llvm.struct<"packed", packed (i32)> "some.op"() : () -> !llvm.struct<"packed", packed (i32)> // CHECK: !llvm.struct<"name with spaces and !^$@$#", packed (i32)> "some.op"() : () -> !llvm.struct<"name with spaces and !^$@$#", packed (i32)> - - // CHECK: !llvm.struct<"mutually-a", (ptr, 3>)>>)> - "some.op"() : () -> !llvm.struct<"mutually-a", (ptr, 3>)>>)> - // CHECK: !llvm.struct<"mutually-b", (ptr>)>, 3>)> - "some.op"() : () -> !llvm.struct<"mutually-b", (ptr>)>, 3>)> - // CHECK: !llvm.struct<"referring-another", (ptr>)> - "some.op"() : () -> !llvm.struct<"referring-another", (ptr>)> - + // CHECK: !llvm.struct<"outer", (struct<"nested", ()>)> + "some.op"() : () -> !llvm.struct<"outer", (struct<"nested", ()>)> + // CHECK: !llvm.struct<"referring-another", (ptr)> + "some.op"() : () -> !llvm.struct<"referring-another", (ptr)> // CHECK: !llvm.struct<"struct-of-arrays", (array<10 x i32>)> "some.op"() : () -> !llvm.struct<"struct-of-arrays", (array<10 x i32>)> // CHECK: !llvm.array<10 x struct<"array-of-structs", (i32)>> "some.op"() : () -> !llvm.array<10 x struct<"array-of-structs", (i32)>> - // CHECK: !llvm.ptr> - "some.op"() : () -> !llvm.ptr> return } @@ -180,16 +160,6 @@ func.func @verbose() { return } -// CHECK-LABEL: @ptr_elem_interface -// CHECK-COUNT-3: !llvm.ptr -// CHECK: llvm.mlir.undef : !llvm.ptr -func.func @ptr_elem_interface(%arg0: !llvm.ptr) { - %0 = llvm.load %arg0 : !llvm.ptr - llvm.store %0, %arg0 : !llvm.ptr - llvm.mlir.undef : !llvm.ptr - return -} - // ----- // Check that type aliases can be used inside LLVM dialect types. Note that @@ -200,13 +170,9 @@ func.func @ptr_elem_interface(%arg0: !llvm.ptr) { !baz = i64 !qux = !llvm.struct<(!baz)> -!rec = !llvm.struct<"a", (ptr>)> - // CHECK: aliases llvm.func @aliases() { // CHECK: !llvm.struct<(i32, f32, struct<(i64)>)> "some.op"() : () -> !llvm.struct<(i32, f32, !qux)> - // CHECK: !llvm.struct<"a", (ptr>)> - "some.op"() : () -> !rec llvm.return } diff --git a/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir b/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir index e33e51ddababb..ffcba1086f3f6 100644 --- a/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir +++ b/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir @@ -29,36 +29,71 @@ transform.sequence failures(propagate) { // CHECK: IR printer: tensor_producer // CHECK-NEXT: %[[COL_TENSOR:.+]] = linalg.generic -// CHECK-SAME: affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1 + d3, d2 + d4, d5)>, -// CHECK-SAME: affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)>] -// CHECK: ^bb0(%[[IN_DATA:.+]]: f32, %[[OUT_DATA:.+]]: f32) -// CHECK: linalg.yield %[[IN_DATA]] : f32 +// CHECK-SAME: affine_map<(d0, d1, d2) -> (d0, d1, d2)>] +// CHECK: ^bb0(%[[OUT_DATA:.+]]: f32) + +// Collapsed indices. +// CHECK: %[[BINDEX:.+]] = linalg.index 0 : index +// CHECK: %[[MINDEX:.+]] = linalg.index 1 : index +// CHECK: %[[KINDEX:.+]] = linalg.index 2 : index + +// Unrolled output shape indices. +// CHECK: %[[C14:.+]] = arith.constant 14 : index +// CHECK: %[[OWINDEX:.+]] = arith.remui %[[MINDEX]], %[[C14]] : index +// CHECK: %[[C14_1:.+]] = arith.constant 14 : index +// CHECK: %[[OHINDEX:.+]] = arith.divui %[[MINDEX]], %[[C14_1]] : index + +// Unrolled filter shape indices. +// CHECK: %[[C4:.+]] = arith.constant 4 : index +// CHECK: %[[ICINDEX:.+]] = arith.remui %[[KINDEX]], %[[C4]] : index +// CHECK: %[[C12:.+]] = arith.constant 12 : index +// CHECK: %[[FWREM:.+]] = arith.remui %[[KINDEX]], %[[C12]] : index +// CHECK: %[[C4_2:.+]] = arith.constant 4 : index +// CHECK: %[[FWINDEX:.+]] = arith.divui %[[FWREM]], %[[C4_2]] : index +// CHECK: %[[C12_3:.+]] = arith.constant 12 : index +// CHECK: %[[FHINDEX:.+]] = arith.divui %[[KINDEX]], %[[C12_3]] : index + +// Compute input indices. +// CHECK: %[[SH:.+]] = arith.constant 1 : index +// CHECK: %[[STRIDEDOH:.+]] = arith.muli %[[OHINDEX]], %[[SH]] : index +// CHECK: %[[CONVH:.+]] = arith.addi %[[STRIDEDOH]], %[[FHINDEX]] : index +// CHECK: %[[SW:.+]] = arith.constant 1 : index +// CHECK: %[[STRIDEDOW:.+]] = arith.muli %[[OWINDEX]], %[[SW]] : index +// CHECK: %[[CONVW:.+]] = arith.addi %[[STRIDEDOW]], %[[FWINDEX]] : index +// CHECK: %[[EXTRACTED_INPUT:.+]] = tensor.extract +// CHECK-SAME: %{{.+}}{{\[}}%[[BINDEX]], %[[CONVH]], %[[CONVW]], %[[ICINDEX]]] : tensor<1x16x16x4xf32> +// CHECK: linalg.yield %[[EXTRACTED_INPUT]] : f32 // CHECK: IR printer: transformed -// CHECK: tensor.expand_shape %{{[^ ]*}} {{\[}}[0, 1, 2], [3]] : tensor<196x16xf32> into tensor<1x14x14x16xf32> +// CHECK: tensor.expand_shape %{{[^ ]*}} {{\[}}[0], [1, 2], [3]] : tensor<1x196x16xf32> into tensor<1x14x14x16xf32> -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1 + d3, d2 + d4, d5)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)> +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> +// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d3, d2)> +// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)> // CHECK: @conv_16433136 -// CHECK: %[[INPUT:.+]]: tensor<1x16x16x4xf32> -// CHECK: %[[FILTER:.+]]: tensor<3x3x4x16xf32> -// CHECK: %[[OUTPUT:.+]]: tensor<1x14x14x16xf32> -// CHECK: %[[INIT_COL_TENSOR:.+]] = tensor.empty() : tensor<1x14x14x3x3x4xf32> +// CHECK-SAME: %[[INPUT:.+]]: tensor<1x16x16x4xf32> +// CHECK-SAME: %[[FILTER:.+]]: tensor<3x3x4x16xf32> +// CHECK-SAME: %[[OUTPUT:.+]]: tensor<1x14x14x16xf32> +// CHECK-DAG: %[[COLLAPSED_FILTER:.+]] = tensor.collapse_shape %[[FILTER]] {{\[}}[0, 1, 2], [3]] : tensor<3x3x4x16xf32> into tensor<36x16xf32> +// CHECK-DAG: %[[COLLAPSED_OUT:.+]] = tensor.collapse_shape %[[OUTPUT]] {{\[}}[0], [1, 2], [3]] : tensor<1x14x14x16xf32> into tensor<1x196x16xf32> +// CHECK: %[[INIT_COL_TENSOR:.+]] = tensor.empty() : tensor<1x196x36xf32> // CHECK: %[[COL_TENSOR:.+]] = linalg.generic // CHECK-SAME: #[[MAP0]] +// CHECK: ^bb0(%[[OUT_DATA:.+]]: f32) +// CHECK: linalg.yield %{{.+}} : f32 +// CHECK: %[[MATMUL_RESULT:.+]] = linalg.generic // CHECK-SAME: #[[MAP1]] -// CHECK: ^bb0(%[[IN_DATA:.+]]: f32, %[[OUT_DATA:.+]]: f32) -// CHECK: linalg.yield %[[IN_DATA]] : f32 -// CHECK-DAG: %[[RESHAPED_INIT_COL_TENSOR:.+]] = tensor.collapse_shape %[[COL_TENSOR]] -// CHECK-SAME: [0, 1, 2], [3, 4, 5] -// CHECK-SAME: tensor<1x14x14x3x3x4xf32> into tensor<196x36xf32> -// CHECK-DAG: %[[RESHAPED_FILTER:.+]] = tensor.collapse_shape %[[FILTER]] -// CHECK-SAME: [0, 1, 2], [3] -// CHECK-SAME: tensor<3x3x4x16xf32> into tensor<36x16xf32> -// CHECK-DAG: %[[RESHAPED_OUTPUT:.+]] = tensor.collapse_shape %[[OUTPUT]] -// CHECK-SAME: [0, 1, 2], [3] -// CHECK: %[[MATMUL_RESULT:.+]] = linalg.matmul ins(%[[RESHAPED_INIT_COL_TENSOR]], %[[RESHAPED_FILTER]] : tensor<196x36xf32>, tensor<36x16xf32>) outs(%[[RESHAPED_OUTPUT]] : tensor<196x16xf32>) -// CHECK: %[[RESULT:.+]] = tensor.expand_shape %[[MATMUL_RESULT]] {{\[}}[0, 1, 2], [3]] : tensor<196x16xf32> into tensor<1x14x14x16xf32> +// CHECK-SAME: #[[MAP2]] +// CHECK-SAME: #[[MAP3]] +// CHECK-SAME: ins(%[[COL_TENSOR]], %[[COLLAPSED_FILTER]] : tensor<1x196x36xf32>, tensor<36x16xf32>) +// CHECK-SAME: outs(%[[COLLAPSED_OUT]] : tensor<1x196x16xf32>) +// CHECK: ^bb0(%[[ARG0:.+]]: f32, %[[ARG1:.+]]: f32, %[[ARG2:.+]]: f32) +// CHECK: %[[MUL:.+]] = arith.mulf %[[ARG0]], %[[ARG1]] : f32 +// CHECK: %[[ADD:.+]] = arith.addf %[[MUL]], %[[ARG2]] : f32 +// CHECK: linalg.yield %[[ADD]] : f32 +// CHECK: } -> tensor<1x196x16xf32> +// CHECK: %[[RESULT:.+]] = tensor.expand_shape %[[MATMUL_RESULT]] {{\[}}[0], [1, 2], [3]] : tensor<1x196x16xf32> into tensor<1x14x14x16xf32> // CHECK: return %[[RESULT]] func.func @conv_16433136(%arg0: tensor<1x16x16x4xf32>, %arg1: tensor<3x3x4x16xf32>, %arg2: tensor<1x14x14x16xf32>) -> tensor<1x14x14x16xf32> { @@ -156,27 +191,24 @@ transform.sequence failures(propagate) { // ----- -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1 + d3, d2 + d4, d5)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> // CHECK-DAG: #[[LHSMAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> // CHECK-DAG: #[[RHSMAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d3, d2)> // CHECK-DAG: #[[RESMAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)> // CHECK: func.func @batch_nhwc_conv // CHECK-SAME: (%[[INPUT:.+]]: tensor<8x16x16x4xf32>, %[[FILTER:.+]]: tensor<3x3x4x16xf32>, %[[INIT:.+]]: tensor<8x14x14x16xf32>) -// CHECK: %[[IT:.+]] = tensor.empty() : tensor<8x14x14x3x3x4xf32> +// CHECK-DAG: %[[CS_FILTER:.+]] = tensor.collapse_shape %[[FILTER]] {{\[}}[0, 1, 2], [3]] : tensor<3x3x4x16xf32> into tensor<36x16xf32> +// CHECK-DAG: %[[CS_RESULT:.+]] = tensor.collapse_shape %[[INIT]] {{\[}}[0], [1, 2], [3]] : tensor<8x14x14x16xf32> into tensor<8x196x16xf32> +// CHECK: %[[IT:.+]] = tensor.empty() : tensor<8x196x36xf32> // CHECK: %[[IMG2COL:.+]] = linalg.generic -// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]] -// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel"] -// CHECK-SAME: ins(%[[INPUT]] : tensor<8x16x16x4xf32>) -// CHECK-SAME: outs(%[[IT]] : tensor<8x14x14x3x3x4xf32>) -// CHECK: %[[CS_INPUT:.+]] = tensor.collapse_shape %[[IMG2COL]] {{\[}}[0], [1, 2], [3, 4, 5]] : tensor<8x14x14x3x3x4xf32> into tensor<8x196x36xf32> -// CHECK: %[[CS_FILTER:.+]] = tensor.collapse_shape %[[FILTER]] {{\[}}[0, 1, 2], [3]] : tensor<3x3x4x16xf32> into tensor<36x16xf32> -// CHECK: %[[CS_RESULT:.+]] = tensor.collapse_shape %[[INIT]] {{\[}}[0], [1, 2], [3]] : tensor<8x14x14x16xf32> into tensor<8x196x16xf32> +// CHECK-SAME: indexing_maps = [#[[MAP]]] +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"] +// CHECK-SAME: outs(%[[IT]] : tensor<8x196x36xf32>) // CHECK: %[[MATMUL:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[LHSMAP]], #[[RHSMAP]], #[[RESMAP]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] -// CHECK-SAME: ins(%[[CS_INPUT]], %[[CS_FILTER]] : tensor<8x196x36xf32>, tensor<36x16xf32>) +// CHECK-SAME: ins(%[[IMG2COL]], %[[CS_FILTER]] : tensor<8x196x36xf32>, tensor<36x16xf32>) // CHECK-SAME: outs(%[[CS_RESULT]] : tensor<8x196x16xf32>) // CHECK: ^bb0(%[[ARG0:.+]]: f32, %[[ARG1:.+]]: f32, %[[ARG2:.+]]: f32): // CHECK: %[[MUL:.+]] = arith.mulf %[[ARG0]], %[[ARG1]] : f32 @@ -201,27 +233,55 @@ transform.sequence failures(propagate) { // ----- -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4 + d2, d5 + d3)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> // CHECK-DAG: #[[LHSMAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d1, d3)> // CHECK-DAG: #[[RHSMAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)> // CHECK-DAG: #[[RESMAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)> // CHECK: func.func @batch_nchw_conv // CHECK-SAME: (%[[INPUT:.+]]: tensor<8x4x16x16xf32>, %[[FILTER:.+]]: tensor<16x4x3x3xf32>, %[[INIT:.+]]: tensor<8x16x14x14xf32>) -// CHECK: %[[IT:.+]] = tensor.empty() : tensor<8x4x3x3x14x14xf32> +// CHECK-DAG: %[[CS_FILTER:.+]] = tensor.collapse_shape %[[FILTER]] {{\[}}[0], [1, 2, 3]] : tensor<16x4x3x3xf32> into tensor<16x36xf32> +// CHECK-DAG: %[[CS_RESULT:.+]] = tensor.collapse_shape %[[INIT]] {{\[}}[0], [1], [2, 3]] : tensor<8x16x14x14xf32> into tensor<8x16x196xf32> +// CHECK: %[[IT:.+]] = tensor.empty() : tensor<8x36x196xf32> // CHECK: %[[IMG2COL:.+]] = linalg.generic -// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]] -// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel"] -// CHECK-SAME: ins(%[[INPUT]] : tensor<8x4x16x16xf32>) -// CHECK-SAME: outs(%[[IT]] : tensor<8x4x3x3x14x14xf32>) -// CHECK: %[[CS_FILTER:.+]] = tensor.collapse_shape %[[FILTER]] {{\[}}[0], [1, 2, 3]] : tensor<16x4x3x3xf32> into tensor<16x36xf32> -// CHECK: %[[CS_INPUT:.+]] = tensor.collapse_shape %[[IMG2COL]] {{\[}}[0], [1, 2, 3], [4, 5]] : tensor<8x4x3x3x14x14xf32> into tensor<8x36x196xf32> -// CHECK: %[[CS_RESULT:.+]] = tensor.collapse_shape %[[INIT]] {{\[}}[0], [1], [2, 3]] : tensor<8x16x14x14xf32> into tensor<8x16x196xf32> +// CHECK-SAME: indexing_maps = [#[[MAP]]] +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"] +// CHECK-SAME: outs(%[[IT]] : tensor<8x36x196xf32>) +// Collapsed indices. +// CHECK: %[[BINDEX:.+]] = linalg.index 0 : index +// CHECK: %[[KINDEX:.+]] = linalg.index 1 : index +// CHECK: %[[NINDEX:.+]] = linalg.index 2 : index + +// Unrolled filter shape indices. +// CHECK: %[[C3:.+]] = arith.constant 3 : index +// CHECK: %[[FWINDEX:.+]] = arith.remui %[[KINDEX]], %[[C3]] : index +// CHECK: %[[C9:.+]] = arith.constant 9 : index +// CHECK: %[[FHREM:.+]] = arith.remui %[[KINDEX]], %[[C9]] : index +// CHECK: %[[C3_1:.+]] = arith.constant 3 : index +// CHECK: %[[FHINDEX:.+]] = arith.divui %[[FHREM]], %[[C3_1]] : index +// CHECK: %[[C9_2:.+]] = arith.constant 9 : index +// CHECK: %[[ICINDEX:.+]] = arith.divui %[[KINDEX]], %[[C9_2]] : index + +// Unrolled output shape indices. +// CHECK: %[[C14:.+]] = arith.constant 14 : index +// CHECK: %[[OWINDEX:.+]] = arith.remui %[[NINDEX]], %[[C14]] : index +// CHECK: %[[C14_3:.+]] = arith.constant 14 : index +// CHECK: %[[OHINDEX:.+]] = arith.divui %[[NINDEX]], %[[C14_3]] : index + +// Compute input indices. +// CHECK: %[[SH:.+]] = arith.constant 1 : index +// CHECK: %[[STRIDEDOH:.+]] = arith.muli %[[OHINDEX]], %[[SH]] : index +// CHECK: %[[CONVH:.+]] = arith.addi %[[STRIDEDOH]], %[[FHINDEX]] : index +// CHECK: %[[SW:.+]] = arith.constant 1 : index +// CHECK: %[[STRIDEDOW:.+]] = arith.muli %[[OWINDEX]], %[[SW]] : index +// CHECK: %[[CONVW:.+]] = arith.addi %[[STRIDEDOW]], %[[FWINDEX]] : index +// CHECK: %[[EXTRACTED_INPUT:.+]] = tensor.extract +// CHECK-SAME: %[[INPUT]]{{\[}}%[[BINDEX]], %[[ICINDEX]], %[[CONVH]], %[[CONVW]]] : tensor<8x4x16x16xf32> +// CHECK: linalg.yield %[[EXTRACTED_INPUT]] : f32 // CHECK: %[[MATMUL:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[LHSMAP]], #[[RHSMAP]], #[[RESMAP]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] -// CHECK-SAME: ins(%[[CS_FILTER]], %[[CS_INPUT]] : tensor<16x36xf32>, tensor<8x36x196xf32>) +// CHECK-SAME: ins(%[[CS_FILTER]], %[[IMG2COL]] : tensor<16x36xf32>, tensor<8x36x196xf32>) // CHECK-SAME: outs(%[[CS_RESULT]] : tensor<8x16x196xf32>) // CHECK: ^bb0(%[[ARG0:.+]]: f32, %[[ARG1:.+]]: f32, %[[ARG2:.+]]: f32): // CHECK: %[[MUL:.+]] = arith.mulf %[[ARG0]], %[[ARG1]] : f32 diff --git a/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir b/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir index bcbad20a2fd7a..a29f86eb4a263 100644 --- a/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir +++ b/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir @@ -6,7 +6,7 @@ func.func @fold_static_stride_subview_with_load(%arg0 : memref<12x32xf32>, %arg1 return %1 : f32 } // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> (s0 + s1 * 2)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1] -> (s0 + s1 * 3)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1] -> (s0 + s1 * 3)> // CHECK: func @fold_static_stride_subview_with_load // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref<12x32xf32> // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index @@ -25,7 +25,7 @@ func.func @fold_dynamic_stride_subview_with_load(%arg0 : memref<12x32xf32>, %arg %1 = memref.load %0[%arg3, %arg4] : memref<4x4xf32, strided<[?, ?], offset: ?>> return %1 : f32 } -// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1, s2] -> (s1 + s2 * s0)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * s2)> // CHECK: func @fold_dynamic_stride_subview_with_load // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref<12x32xf32> // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index @@ -34,8 +34,8 @@ func.func @fold_dynamic_stride_subview_with_load(%arg0 : memref<12x32xf32>, %arg // CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG5:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG6:[a-zA-Z0-9_]+]]: index -// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP]]()[%[[ARG5]], %[[ARG1]], %[[ARG3]]] -// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP]]()[%[[ARG6]], %[[ARG2]], %[[ARG4]]] +// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[ARG3]], %[[ARG5]]] +// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP]]()[%[[ARG2]], %[[ARG4]], %[[ARG6]]] // CHECK: memref.load %[[ARG0]][%[[I1]], %[[I2]]] // ----- @@ -66,7 +66,7 @@ func.func @fold_dynamic_stride_subview_with_store(%arg0 : memref<12x32xf32>, %ar memref.store %arg7, %0[%arg3, %arg4] : memref<4x4xf32, strided<[?, ?], offset: ?>> return } -// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1, s2] -> (s1 + s2 * s0)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * s2)> // CHECK: func @fold_dynamic_stride_subview_with_store // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref<12x32xf32> // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index @@ -75,8 +75,8 @@ func.func @fold_dynamic_stride_subview_with_store(%arg0 : memref<12x32xf32>, %ar // CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG5:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG6:[a-zA-Z0-9_]+]]: index -// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP]]()[%[[ARG5]], %[[ARG1]], %[[ARG3]]] -// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP]]()[%[[ARG6]], %[[ARG2]], %[[ARG4]]] +// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[ARG3]], %[[ARG5]]] +// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP]]()[%[[ARG2]], %[[ARG4]], %[[ARG6]]] // CHECK: memref.store %{{.+}}, %[[ARG0]][%[[I1]], %[[I2]]] // ----- @@ -85,7 +85,7 @@ func.func @fold_subview_with_transfer_read_0d( %arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index) -> vector { %f1 = arith.constant 1.0 : f32 - %0 = memref.subview %arg0[%arg1, %arg2][1, 1][2, %arg3] : memref<12x32xf32> to memref> + %0 = memref.subview %arg0[%arg1, %arg2][1, 1][1, 1] : memref<12x32xf32> to memref> %1 = vector.transfer_read %0[], %f1 : memref>, vector return %1 : vector } @@ -100,22 +100,14 @@ func.func @fold_subview_with_transfer_read_0d( func.func @fold_subview_with_transfer_read(%arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index, %arg6 : index) -> vector<4xf32> { %f1 = arith.constant 1.0 : f32 + %0 = memref.subview %arg0[%arg1, %arg2][4, 4][%arg5, %arg6] : memref<12x32xf32> to memref<4x4xf32, strided<[?, ?], offset: ?>> %1 = vector.transfer_read %0[%arg3, %arg4], %f1 {in_bounds = [true]} : memref<4x4xf32, strided<[?, ?], offset: ?>>, vector<4xf32> return %1 : vector<4xf32> } -// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1, s2] -> (s1 + s2 * s0)> // CHECK: func @fold_subview_with_transfer_read -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref<12x32xf32> -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index -// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: index -// CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]]: index -// CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: index -// CHECK-SAME: %[[ARG5:[a-zA-Z0-9_]+]]: index -// CHECK-SAME: %[[ARG6:[a-zA-Z0-9_]+]]: index -// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP]]()[%[[ARG5]], %[[ARG1]], %[[ARG3]]] -// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP]]()[%[[ARG6]], %[[ARG2]], %[[ARG4]]] -// CHECK: vector.transfer_read %[[ARG0]][%[[I1]], %[[I2]]] +// Can't fold this atm since we don't emit the proper vector.extract_strided_slice. +// CHECK: memref.subview // ----- @@ -123,7 +115,7 @@ func.func @fold_static_stride_subview_with_transfer_write_0d( %arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %v : vector) { %f1 = arith.constant 1.0 : f32 - %0 = memref.subview %arg0[%arg1, %arg2][1, 1][2, %arg3] : memref<12x32xf32> to memref> + %0 = memref.subview %arg0[%arg1, %arg2][1, 1][1, 1] : memref<12x32xf32> to memref> vector.transfer_write %v, %0[] {in_bounds = []} : vector, memref> return } @@ -143,18 +135,9 @@ func.func @fold_static_stride_subview_with_transfer_write(%arg0 : memref<12x32xf vector.transfer_write %arg7, %0[%arg3, %arg4] {in_bounds = [true]} : vector<4xf32>, memref<4x4xf32, strided<[?, ?], offset: ?>> return } -// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1, s2] -> (s1 + s2 * s0)> // CHECK: func @fold_static_stride_subview_with_transfer_write -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref<12x32xf32> -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index -// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: index -// CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]]: index -// CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: index -// CHECK-SAME: %[[ARG5:[a-zA-Z0-9_]+]]: index -// CHECK-SAME: %[[ARG6:[a-zA-Z0-9_]+]]: index -// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP]]()[%[[ARG5]], %[[ARG1]], %[[ARG3]]] -// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP]]()[%[[ARG6]], %[[ARG2]], %[[ARG4]]] -// CHECK: vector.transfer_write %{{.+}}, %[[ARG0]][%[[I1]], %[[I2]]] +// Can't fold this atm since we don't emit the proper vector.extract_strided_slice. +// CHECK: memref.subview // ----- @@ -168,7 +151,7 @@ func.func @fold_rank_reducing_subview_with_load %1 = memref.load %0[%arg13, %arg14, %arg15, %arg16] : memref<4x1x4x1xf32, strided<[?, ?, ?, ?], offset: ?>> return %1 : f32 } -// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1, s2] -> (s1 + s2 * s0)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * s2)> // CHECK: func @fold_rank_reducing_subview_with_load // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index @@ -187,10 +170,10 @@ func.func @fold_rank_reducing_subview_with_load // CHECK-SAME: %[[ARG14:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG15:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG16:[a-zA-Z0-9_]+]]: index -// CHECK-DAG: %[[I0:.+]] = affine.apply #[[MAP]]()[%[[ARG7]], %[[ARG1]], %[[ARG13]]] -// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP]]()[%[[ARG9]], %[[ARG3]], %[[ARG14]]] -// CHECK-DAG: %[[I3:.+]] = affine.apply #[[MAP]]()[%[[ARG10]], %[[ARG4]], %[[ARG15]]] -// CHECK-DAG: %[[I4:.+]] = affine.apply #[[MAP]]()[%[[ARG11]], %[[ARG5]], %[[ARG16]]] +// CHECK-DAG: %[[I0:.+]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[ARG13]], %[[ARG7]]] +// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP]]()[%[[ARG3]], %[[ARG14]], %[[ARG9]]] +// CHECK-DAG: %[[I3:.+]] = affine.apply #[[MAP]]()[%[[ARG4]], %[[ARG15]], %[[ARG10]]] +// CHECK-DAG: %[[I4:.+]] = affine.apply #[[MAP]]()[%[[ARG5]], %[[ARG16]], %[[ARG11]]] // CHECK: memref.load %[[ARG0]][%[[I0]], %[[ARG2]], %[[I2]], %[[I3]], %[[I4]], %[[ARG6]]] // ----- diff --git a/mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir b/mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir new file mode 100644 index 0000000000000..93a0d77bc698f --- /dev/null +++ b/mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir @@ -0,0 +1,262 @@ +// RUN: mlir-opt -fold-tensor-subset-ops -split-input-file %s | FileCheck %s + +func.func @fold_vector_transfer_read_with_rank_reduced_extract_slice( + %arg0 : tensor, + %arg1: index, %arg2 : index, %arg3 : index, %arg4: index, %arg5 : index, + %arg6 : index) -> vector<4xf32> { + %cst = arith.constant 0.0 : f32 + %0 = tensor.extract_slice %arg0[0, %arg1, %arg2] [1, %arg3, %arg4] [1, 1, 1] + : tensor to + tensor + %1 = vector.transfer_read %0[%arg5, %arg6], %cst {in_bounds = [true]} + : tensor, vector<4xf32> + return %1 : vector<4xf32> +} +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<()[s0, s1] -> (s0 + s1)> +// CHECK: func @fold_vector_transfer_read_with_rank_reduced_extract_slice +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG5:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]: index +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[$MAP1]]()[%[[ARG1]], %[[ARG5]]] +// CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[$MAP1]]()[%[[ARG2]], %[[ARG6]]] +// CHECK: vector.transfer_read %[[ARG0]][%[[C0]], %[[IDX0]], %[[IDX1]]], %{{.*}} : tensor, + %i1: index, %i2: index, %i3: index, %i4: index) -> vector<4xf32> { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + %f0 = arith.constant 0.000000e+00 : f32 + + // Can't fold this atm since we don' emit the proper vector.extract_strided_slice. +// CHECK: tensor.extract_slice + %0 = tensor.extract_slice %src[0, %i1, %i2, %i3] [1, 4, 1, 4] [2, 3, 4, 5] : tensor<1x8x8x8xf32> to tensor<1x4x4xf32> + %1 = vector.transfer_read %0[%c1, %i4, %c2], %f0 {in_bounds = [true]} : tensor<1x4x4xf32>, vector<4xf32> + return %1 : vector<4xf32> +} + +// ----- + +// CHECK-DAG: #[[$ADD_4:.+]] = affine_map<()[s0] -> (s0 + 4)> + +// CHECK-LABEL: func @transfer_read_of_extract_slice( +// CHECK-SAME: %[[t:.*]]: tensor, %[[s1:.*]]: index, %[[s2:.*]]: index +// CHECK-DAG: %[[c8:.*]] = arith.constant 8 : index +// CHECK: %[[add:.*]] = affine.apply #[[$ADD_4]]()[%[[s1]]] +// CHECK: %[[r:.*]] = vector.transfer_read %[[t]][%[[c8]], %[[add]]], %{{.*}} {in_bounds = [true, true]} : tensor, vector<5x6xf32> +// CHECK: return %[[r]] +func.func @transfer_read_of_extract_slice(%t : tensor, %s1 : index, %s2 : index) -> vector<5x6xf32> { + %c3 = arith.constant 3 : index + %c4 = arith.constant 4 : index + %cst = arith.constant 0.0 : f32 + %0 = tensor.extract_slice %t[5, %s1] [10, %s2] [1, 1] : tensor to tensor<10x?xf32> + %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = [true, true]} : tensor<10x?xf32>, vector<5x6xf32> + return %1 : vector<5x6xf32> +} +// ----- + +func.func @fold_extract_slice_with_transfer_read_0d( + %arg0 : tensor<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index) + -> vector { + %f1 = arith.constant 1.0 : f32 + %0 = tensor.extract_slice %arg0[%arg1, %arg2][1, 1][1, 1] : tensor<12x32xf32> to tensor + %1 = vector.transfer_read %0[], %f1 : tensor, vector + return %1 : vector +} +// CHECK: func @fold_extract_slice_with_transfer_read_0d +// CHECK-SAME: %[[T:[a-zA-Z0-9_]+]]: tensor<12x32xf32> +// CHECK-SAME: %[[SZ0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[SZ1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[ST1:[a-zA-Z0-9_]+]]: index +// CHECK: vector.transfer_read %[[T]][%[[SZ0]], %[[SZ1]]] + +// ----- + +// CHECK-DAG: #[[$ADD_4:.+]] = affine_map<()[s0] -> (s0 + 4)> + +// CHECK-LABEL: func @transfer_read_of_extract_slice( +// CHECK-SAME: %[[t:.*]]: tensor, %[[s1:.*]]: index, %[[s2:.*]]: index +// CHECK-DAG: %[[c8:.*]] = arith.constant 8 : index +// CHECK: %[[add:.*]] = affine.apply #[[$ADD_4]]()[%[[s1]]] +// CHECK: %[[r:.*]] = vector.transfer_read %[[t]][%[[c8]], %[[add]]], %{{.*}} {in_bounds = [true]} : tensor, vector<6xf32> +// CHECK: return %[[r]] +func.func @transfer_read_of_extract_slice(%t : tensor, %s1 : index, %s2 : index) -> vector<6xf32> { + %c3 = arith.constant 3 : index + %c4 = arith.constant 4 : index + %cst = arith.constant 0.0 : f32 + %0 = tensor.extract_slice %t[5, %s1] [10, %s2] [1, 1] : tensor to tensor<10x?xf32> + %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = [true]} : tensor<10x?xf32>, vector<6xf32> + return %1 : vector<6xf32> +} + +// ----- + +// CHECK-DAG: #[[$ADD_3:.+]] = affine_map<()[s0] -> (s0 + 3)> + +// CHECK-LABEL: func @transfer_read_of_extract_slice_rank_reducing( +// CHECK-SAME: %[[t:.*]]: tensor, %[[s1:.*]]: index, %[[s2:.*]]: index +// CHECK-DAG: %[[c5:.*]] = arith.constant 5 : index +// CHECK-DAG: %[[c10:.*]] = arith.constant 10 : index +// CHECK: %[[add:.*]] = affine.apply #[[$ADD_3]]()[%[[s1]]] +// CHECK: %[[r:.*]] = vector.transfer_read %[[t]][%[[c5]], %[[add]], %[[c10]]], %{{.*}} {in_bounds = [true, true]} : tensor, vector<5x6xf32> +// CHECK: return %[[r]] +func.func @transfer_read_of_extract_slice_rank_reducing(%t : tensor, %s1 : index, %s2 : index) -> vector<5x6xf32> { + %c3 = arith.constant 3 : index + %c4 = arith.constant 4 : index + %cst = arith.constant 0.0 : f32 + %0 = tensor.extract_slice %t[5, %s1, 6] [1, %s2, 12] [1, 1, 1] : tensor to tensor + %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = [true, true]} : tensor, vector<5x6xf32> + return %1 : vector<5x6xf32> +} + +// ----- + +// CHECK-DAG: #[[$ADD_4:.+]] = affine_map<()[s0] -> (s0 + 4)> +// CHECK-DAG: #[[$d0d2:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)> + +// CHECK-LABEL: func @transfer_read_of_extract_slice_swappy_rank_reducing( +// CHECK-SAME: %[[t:.*]]: tensor, %[[s1:.*]]: index, %[[s2:.*]]: index +func.func @transfer_read_of_extract_slice_swappy_rank_reducing(%t : tensor, %s1 : index, %s2 : index) -> vector<5x6xf32> { + %c3 = arith.constant 3 : index + %c4 = arith.constant 4 : index + %cst = arith.constant 0.0 : f32 + +// CHECK-NOT: extract_slice +// CHECK: %[[c8:.*]] = arith.constant 8 : index +// CHECK: %[[add:.*]] = affine.apply #[[$ADD_4]]()[%[[s2]]] +// CHECK: %[[r:.*]] = vector.transfer_read %[[t]][%[[c8]], %[[s1]], %[[add]]] +// CHECK-SAME: permutation_map = #[[$d0d2]] +// CHECK-SAME: tensor, vector<5x6xf32> + %0 = tensor.extract_slice %t[5, %s1, %s2] [%s2, 1, 12] [1, 1, 1] : tensor to tensor + %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = [true, true]} : tensor, vector<5x6xf32> + + return %1 : vector<5x6xf32> +} + +// ----- + +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1] -> (s0 + s1)> + +// CHECK: func @fold_vector_transfer_write_with_rank_reduced_insert_slice +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: vector<4xf32> +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG5:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG7:[a-zA-Z0-9]+]]: index +func.func @fold_vector_transfer_write_with_rank_reduced_insert_slice( + %arg0 : tensor, + %arg1 : vector<4xf32>, %arg2: index, %arg3 : index, %arg4 : index, + %arg5: index, %arg6 : index, %arg7 : index, + %st : tensor) -> tensor { + %cst = arith.constant 0.0 : f32 + +// CHECK-NOT: insert_slice +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[ARG6]]] +// CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]]()[%[[ARG3]], %[[ARG7]]] +// CHECK-DAG: vector.transfer_write %[[ARG1]], %[[ARG0]][%[[C0]], %[[IDX0]], %[[IDX1]]] {in_bounds = [true]} : vector<4xf32>, tensor, tensor + %1 = tensor.insert_slice %0 into %arg0[0, %arg2, %arg3] [1, %arg4, %arg5] [1, 1, 1] + : tensor into tensor + return %1 : tensor +} + +// ----- + +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1] -> (s0 + s1)> +// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1)> + +// CHECK: func @fold_vector_transfer_write_with_inner_rank_reduced_insert_slice +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: vector<4xf32> +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG5:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG7:[a-zA-Z0-9]+]]: index +func.func @fold_vector_transfer_write_with_inner_rank_reduced_insert_slice( + %arg0 : tensor, + %arg1 : vector<4xf32>, %arg2: index, %arg3 : index, %arg4 : index, + %arg5: index, %arg6 : index, %arg7 : index, + %st : tensor) -> tensor { + %cst = arith.constant 0.0 : f32 + + // CHECK-NOT: insert_slice + // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index + // CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[ARG6]]] + // CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]]()[%[[ARG3]], %[[ARG7]]] + // CHECK-DAG: vector.transfer_write %[[ARG1]], %[[ARG0]][%[[IDX0]], %[[IDX1]], %[[C0]]] + // CHECK-SAME: {in_bounds = [true], permutation_map = #[[MAP2]]} : vector<4xf32>, tensor, tensor + %1 = tensor.insert_slice %0 into %arg0[%arg2, %arg3, 0] [%arg4, %arg5, 1] [1, 1, 1] + : tensor into tensor + return %1 : tensor +} + +// ----- + +// CHECK-LABEL: func @insert_slice_of_transfer_write( +// CHECK-SAME: %[[t1:.*]]: tensor, %[[v:.*]]: vector<5x6xf32>, %[[s:.*]]: index +func.func @insert_slice_of_transfer_write(%t1 : tensor, %v : vector<5x6xf32>, %s : index, %t2 : tensor<5x6xf32>) -> tensor { + %c0 = arith.constant 0 : index + + // CHECK-NOT: insert_slice +// CHECK: %[[c3:.*]] = arith.constant 3 : index +// CHECK: %[[r:.*]] = vector.transfer_write %[[v]], %[[t1]][%[[c3]], %[[s]]] {in_bounds = [true, true]} : vector<5x6xf32>, tensor +// CHECK: return %[[r]] + %0 = vector.transfer_write %v, %t2[%c0, %c0] {in_bounds = [true, true]} : vector<5x6xf32>, tensor<5x6xf32> + %1 = tensor.insert_slice %0 into %t1[3, %s] [5, 6] [1, 1] : tensor<5x6xf32> into tensor + return %1 : tensor +} + +// ----- + +// CHECK-DAG: #[[$d0d2:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)> + +// CHECK-LABEL: func @insert_slice_of_transfer_write_swappy_rank_extending( +// CHECK-SAME: %[[t1:.*]]: tensor, %[[v:.*]]: vector<5x6xf32>, %[[s:.*]]: index +func.func @insert_slice_of_transfer_write_swappy_rank_extending( + %t1 : tensor, %v : vector<5x6xf32>, + %s : index, %t2 : tensor<5x6xf32>) -> tensor { + %c0 = arith.constant 0 : index + +// CHECK-NOT: insert_slice +// CHECK-DAG: %[[c3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[c4:.*]] = arith.constant 4 : index +// CHECK: %[[r:.*]] = vector.transfer_write %[[v]], %[[t1]][%[[c4]], %[[c3]], %[[s]]] +// CHECK-SAME: {in_bounds = [true, true], permutation_map = #[[$d0d2]]} : vector<5x6xf32>, tensor +// CHECK: return %[[r]] + %0 = vector.transfer_write %v, %t2[%c0, %c0] {in_bounds = [true, true]} : vector<5x6xf32>, tensor<5x6xf32> + %1 = tensor.insert_slice %0 into %t1[4, 3, %s] [5, 1, 6] [1, 1, 1] : tensor<5x6xf32> into tensor + return %1 : tensor +} + +// ----- + +// CHECK-LABEL: func @insert_slice_of_transfer_write_rank_extending( +// CHECK-SAME: %[[t1:.*]]: tensor, %[[v:.*]]: vector<5x6xf32>, %[[s:.*]]: index +// CHECK-DAG: %[[c3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[c4:.*]] = arith.constant 4 : index +// CHECK: %[[r:.*]] = vector.transfer_write %[[v]], %[[t1]][%[[c4]], %[[c3]], %[[s]]] {in_bounds = [true, true]} : vector<5x6xf32>, tensor +// CHECK: return %[[r]] +func.func @insert_slice_of_transfer_write_rank_extending(%t1 : tensor, %v : vector<5x6xf32>, %s : index, %t2 : tensor<5x6xf32>) -> tensor { + %c0 = arith.constant 0 : index + %0 = vector.transfer_write %v, %t2[%c0, %c0] {in_bounds = [true, true]} : vector<5x6xf32>, tensor<5x6xf32> + %1 = tensor.insert_slice %0 into %t1[4, 3, %s] [1, 5, 6] [1, 1, 1] : tensor<5x6xf32> into tensor + return %1 : tensor +} diff --git a/mlir/test/Dialect/Tosa/canonicalize.mlir b/mlir/test/Dialect/Tosa/canonicalize.mlir index e16a614c7cd01..77627d8c8ba62 100644 --- a/mlir/test/Dialect/Tosa/canonicalize.mlir +++ b/mlir/test/Dialect/Tosa/canonicalize.mlir @@ -434,3 +434,56 @@ func.func @fold_resize_bilinear(%arg0 : tensor<1x15x13x1xi8>) -> tensor<1x15x13x %resize = "tosa.resize"(%arg0) {mode = "BILINEAR", scale = array, offset = array, border = array} : (tensor<1x15x13x1xi8>) -> tensor<1x15x13x1xi8> return %resize : tensor<1x15x13x1xi8> } + +// ----- + +// CHECK-LABEL: @canonicalize_concat_slice_final_axis +// CHECK-SAME: %[[VAL_0:.*]]: tensor<1x12x12x1xf32>, %[[VAL_1:.*]]: tensor<1x12x12x1xf32> +// CHECK: return %[[VAL_0]], %[[VAL_1]] : tensor<1x12x12x1xf32>, tensor<1x12x12x1xf32> +func.func @canonicalize_concat_slice_final_axis(%arg0 : tensor<1x12x12x1xf32>, %arg1 : tensor<1x12x12x1xf32>) -> (tensor<1x12x12x1xf32>, tensor<1x12x12x1xf32>) { + %0 = "tosa.concat"(%arg0, %arg1) {axis = 3 : i64} : (tensor<1x12x12x1xf32>, tensor<1x12x12x1xf32>) -> tensor<1x12x12x2xf32> + %1 = "tosa.slice"(%0) {size = array, start = array} : (tensor<1x12x12x2xf32>) -> tensor<1x12x12x1xf32> + %2 = "tosa.slice"(%0) {size = array, start = array} : (tensor<1x12x12x2xf32>) -> tensor<1x12x12x1xf32> + return %1, %2 : tensor<1x12x12x1xf32>, tensor<1x12x12x1xf32> +} + +// ----- + +// CHECK-LABEL: @canonicalize_concat_slice_middle_axis +// CHECK-SAME: %[[VAL_0:.*]]: tensor<1x12x12xf32>, %[[VAL_1:.*]]: tensor<1x12x12xf32> +// CHECK: return %[[VAL_0]], %[[VAL_1]] : tensor<1x12x12xf32>, tensor<1x12x12xf32> +func.func @canonicalize_concat_slice_middle_axis(%arg0 : tensor<1x12x12xf32>, %arg1 : tensor<1x12x12xf32>) -> (tensor<1x12x12xf32>, tensor<1x12x12xf32>) { + %0 = "tosa.concat"(%arg0, %arg1) {axis = 1 : i64} : (tensor<1x12x12xf32>, tensor<1x12x12xf32>) -> tensor<1x24x12xf32> + %1 = "tosa.slice"(%0) {size = array, start = array} : (tensor<1x24x12xf32>) -> tensor<1x12x12xf32> + %2 = "tosa.slice"(%0) {size = array, start = array} : (tensor<1x24x12xf32>) -> tensor<1x12x12xf32> + return %1, %2 : tensor<1x12x12xf32>, tensor<1x12x12xf32> +} + +// ----- + +// CHECK-LABEL: @canonicalize_cross_concat_inputs +// CHECK-SAME: %[[VAL_0:.*]]: tensor<1x12x12xf32>, %[[VAL_1:.*]]: tensor<1x12x12xf32> +// CHECK: %[[VAL_2:.*]] = "tosa.concat"(%[[VAL_0]], %[[VAL_1]]) {axis = 2 : i64} : (tensor<1x12x12xf32>, tensor<1x12x12xf32>) -> tensor<1x12x24xf32> +// CHECK: %[[VAL_3:.*]] = "tosa.slice"(%[[VAL_2]]) {size = array, start = array} : (tensor<1x12x24xf32>) -> tensor<1x12x15xf32> +// CHECK: %[[VAL_4:.*]] = "tosa.slice"(%[[VAL_2]]) {size = array, start = array} : (tensor<1x12x24xf32>) -> tensor<1x12x20xf32> +// CHECK: return %[[VAL_3]], %[[VAL_4]] : tensor<1x12x15xf32>, tensor<1x12x20xf32> +func.func @canonicalize_cross_concat_inputs(%arg0 : tensor<1x12x12xf32>, %arg1 : tensor<1x12x12xf32>) -> (tensor<1x12x15xf32>, tensor<1x12x20xf32>) { + %0 = "tosa.concat"(%arg0, %arg1) {axis = 2 : i64} : (tensor<1x12x12xf32>, tensor<1x12x12xf32>) -> tensor<1x12x24xf32> + %1 = "tosa.slice"(%0) {size = array, start = array} : (tensor<1x12x24xf32>) -> tensor<1x12x15xf32> + %2 = "tosa.slice"(%0) {size = array, start = array} : (tensor<1x12x24xf32>) -> tensor<1x12x20xf32> + return %1, %2 : tensor<1x12x15xf32>, tensor<1x12x20xf32> +} + +// ----- + +// CHECK-LABEL: @canonicalize_concat_slice_on_non_concat_axis +// CHECK-SAME: %[[VAL_0:.*]]: tensor<1x12x12xf32>, %[[VAL_1:.*]]: tensor<1x12x12xf32> +// CHECK: %[[VAL_2:.*]] = "tosa.slice"(%[[VAL_0]]) {size = array, start = array} : (tensor<1x12x12xf32>) -> tensor<1x6x12xf32> +// CHECK: %[[VAL_3:.*]] = "tosa.slice"(%[[VAL_1]]) {size = array, start = array} : (tensor<1x12x12xf32>) -> tensor<1x3x12xf32> +// CHECK: return %[[VAL_2]], %[[VAL_3]] : tensor<1x6x12xf32>, tensor<1x3x12xf32> +func.func @canonicalize_concat_slice_on_non_concat_axis(%arg0 : tensor<1x12x12xf32>, %arg1 : tensor<1x12x12xf32>) -> (tensor<1x6x12xf32>, tensor<1x3x12xf32>) { + %0 = "tosa.concat"(%arg0, %arg1) {axis = 2 : i64} : (tensor<1x12x12xf32>, tensor<1x12x12xf32>) -> tensor<1x12x24xf32> + %1 = "tosa.slice"(%0) {size = array, start = array} : (tensor<1x12x24xf32>) -> tensor<1x6x12xf32> + %2 = "tosa.slice"(%0) {size = array, start = array} : (tensor<1x12x24xf32>) -> tensor<1x3x12xf32> + return %1, %2 : tensor<1x6x12xf32>, tensor<1x3x12xf32> +} diff --git a/mlir/test/Dialect/Vector/ops.mlir b/mlir/test/Dialect/Vector/ops.mlir index 60e1507293f7e..4013d5daee8cc 100644 --- a/mlir/test/Dialect/Vector/ops.mlir +++ b/mlir/test/Dialect/Vector/ops.mlir @@ -291,6 +291,18 @@ func.func @contraction_to_scalar(%arg0: vector<10xf32>, %arg1: vector<10xf32>) - return %0 : f32 } +// CHECK-LABEL: @contraction_extra_attrs +func.func @contraction_extra_attrs(%arg0: vector<10xf32>, %arg1: vector<10xf32>) -> f32 { + // CHECK: %[[C0:.*]] = arith.constant 0.000000e+00 : f32 + %f0 = arith.constant 0.0: f32 + // CHECK: %[[X:.*]] = vector.contract {indexing_maps = [#{{.*}}, #{{.*}}, #{{.*}}], iterator_types = ["reduction"], kind = #vector.kind} %{{.*}}, %{{.*}}, %[[C0]] {first_attr = 1 : i32, second_attr = "string"} : vector<10xf32>, vector<10xf32> into f32 + %0 = vector.contract #contraction_to_scalar_trait %arg0, %arg1, %f0 + {first_attr = 1 : i32, second_attr = "string"} + : vector<10xf32>, vector<10xf32> into f32 + // CHECK: return %[[X]] : f32 + return %0 : f32 +} + #contraction_to_scalar_max_accesses = [ affine_map<(i) -> (i)>, affine_map<(i) -> (i)>, diff --git a/mlir/test/Dialect/Vector/vector-contract-transforms.mlir b/mlir/test/Dialect/Vector/vector-contract-transforms.mlir index 2cbd604759edc..e3f86ee0b39bc 100644 --- a/mlir/test/Dialect/Vector/vector-contract-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-contract-transforms.mlir @@ -1,7 +1,6 @@ // RUN: mlir-opt %s -test-vector-contraction-lowering | FileCheck %s // RUN: mlir-opt %s -test-vector-contraction-lowering=vector-lower-matrix-intrinsics=1 | FileCheck %s --check-prefix=MATRIX // RUN: mlir-opt %s -test-vector-contraction-lowering=vector-outerproduct=1 | FileCheck %s --check-prefix=OUTERPRODUCT -// RUN: mlir-opt %s -test-vector-contraction-lowering=vector-filter-outerproduct=1 | FileCheck %s --check-prefix=FILTEROUTERPRODUCT // RUN: mlir-opt %s -test-vector-contraction-lowering=vector-parallel-arith=1 | FileCheck %s --check-prefix=PARALLEL #dotp_accesses = [ @@ -1182,32 +1181,6 @@ func.func @matmul_7(%arg0: vector<2x1xf32>, %arg1: vector<1x3xf32>, %arg2: vecto return %0 : vector<3x2xf32> } -// FILTEROUTERPRODUCT-LABEL: func @matmul_4_filtered -// FILTEROUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<4x4xf32>, -// FILTEROUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<4x4xf32>, -// FILTEROUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<4x4xf32> -// FILTEROUTERPRODUCT: %[[c0:.*]] = vector.contract {{{.*}}} %[[A]], %[[B]], %[[C]] -func.func @matmul_4_filtered(%arg0: vector<4x4xf32>, %arg1: vector<4x4xf32>, %arg2: vector<4x4xf32>) --> vector<4x4xf32> -{ - %0 = vector.contract #matmat_trait_0 %arg0, %arg1, %arg2 - : vector<4x4xf32>, vector<4x4xf32> into vector<4x4xf32> - return %0 : vector<4x4xf32> -} - -// FILTEROUTERPRODUCT-LABEL: func @matmul_4_not_filtered -// FILTEROUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<3x4xf32>, -// FILTEROUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<4x4xf32>, -// FILTEROUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<3x4xf32> -// FILTEROUTERPRODUCT: %[[c0:.*]] = vector.contract {{{.*}}} %[[A]], %[[B]], %[[C]] -func.func @matmul_4_not_filtered(%arg0: vector<3x4xf32>, %arg1: vector<4x4xf32>, %arg2: vector<3x4xf32>) --> vector<3x4xf32> -{ - %0 = vector.contract #matmat_trait_0 %arg0, %arg1, %arg2 - : vector<3x4xf32>, vector<4x4xf32> into vector<3x4xf32> - return %0 : vector<3x4xf32> -} - // PARALLEL-LABEL: func @parrallel_contract_lowering // PARALLEL: %[[E0:.*]] = vector.extract %{{.*}}[0, 0] : vector<1x1x4xf32> // PARALLEL: %[[E1:.*]] = vector.extract %{{.*}}[0, 0] : vector<1x1x4xf32> diff --git a/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-sitofp-i32.mlir b/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-sitofp-i32.mlir new file mode 100644 index 0000000000000..3fc008705f111 --- /dev/null +++ b/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-sitofp-i32.mlir @@ -0,0 +1,68 @@ +// Check that the wide integer `arith.sitofp` emulation produces the same result as wide +// `arith.sitofp`. Emulate i32 ops with i16 ops. + +// RUN: mlir-opt %s --convert-scf-to-cf --convert-cf-to-llvm --convert-vector-to-llvm \ +// RUN: --convert-func-to-llvm --convert-arith-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: --shared-libs=%mlir_c_runner_utils | \ +// RUN: FileCheck %s --match-full-lines + +// RUN: mlir-opt %s --test-arith-emulate-wide-int="widest-int-supported=16" \ +// RUN: --convert-scf-to-cf --convert-cf-to-llvm --convert-vector-to-llvm \ +// RUN: --convert-func-to-llvm --convert-arith-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: --shared-libs=%mlir_c_runner_utils | \ +// RUN: FileCheck %s --match-full-lines + +// Ops in this function *only* will be emulated using i16 types. +func.func @emulate_sitofp(%arg: i32) -> f32 { + %res = arith.sitofp %arg : i32 to f32 + return %res : f32 +} + +func.func @check_sitofp(%arg : i32) -> () { + %res = func.call @emulate_sitofp(%arg) : (i32) -> (f32) + vector.print %res : f32 + return +} + +func.func @entry() { + %cst0 = arith.constant 0 : i32 + %cst1 = arith.constant 1 : i32 + %cst2 = arith.constant 2 : i32 + %cst7 = arith.constant 7 : i32 + %cst1337 = arith.constant 1337 : i32 + + %cst_n1 = arith.constant -1 : i32 + %cst_n13 = arith.constant -13 : i32 + %cst_n1337 = arith.constant -1337 : i32 + + %cst_i16_min = arith.constant -32768 : i32 + + %cst_f32_int_max = arith.constant 16777217 : i32 + %cst_f32_int_min = arith.constant -16777217 : i32 + + // CHECK: 0 + func.call @check_sitofp(%cst0) : (i32) -> () + // CHECK-NEXT: 1 + func.call @check_sitofp(%cst1) : (i32) -> () + // CHECK-NEXT: 2 + func.call @check_sitofp(%cst2) : (i32) -> () + // CHECK-NEXT: 7 + func.call @check_sitofp(%cst7) : (i32) -> () + // CHECK-NEXT: 1337 + func.call @check_sitofp(%cst1337) : (i32) -> () + // CHECK-NEXT: -1 + func.call @check_sitofp(%cst_n1) : (i32) -> () + // CHECK-NEXT: -1337 + func.call @check_sitofp(%cst_n1337) : (i32) -> () + + // CHECK-NEXT: -32768 + func.call @check_sitofp(%cst_i16_min) : (i32) -> () + // CHECK-NEXT: 1.6{{.+}}e+07 + func.call @check_sitofp(%cst_f32_int_max) : (i32) -> () + // CHECK-NEXT: -1.6{{.+}}e+07 + func.call @check_sitofp(%cst_f32_int_min) : (i32) -> () + + return +} diff --git a/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-uitofp-i32.mlir b/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-uitofp-i32.mlir new file mode 100644 index 0000000000000..c3d7db0de6d20 --- /dev/null +++ b/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-uitofp-i32.mlir @@ -0,0 +1,77 @@ +// Check that the wide integer `arith.uitofp` emulation produces the same result as wide +// `arith.uitofp`. Emulate i32 ops with i16 ops. + +// RUN: mlir-opt %s --convert-scf-to-cf --convert-cf-to-llvm --convert-vector-to-llvm \ +// RUN: --convert-func-to-llvm --convert-arith-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: --shared-libs=%mlir_c_runner_utils | \ +// RUN: FileCheck %s --match-full-lines + +// RUN: mlir-opt %s --test-arith-emulate-wide-int="widest-int-supported=16" \ +// RUN: --convert-scf-to-cf --convert-cf-to-llvm --convert-vector-to-llvm \ +// RUN: --convert-func-to-llvm --convert-arith-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: --shared-libs=%mlir_c_runner_utils | \ +// RUN: FileCheck %s --match-full-lines + +// Ops in this function *only* will be emulated using i16 types. +func.func @emulate_uitofp(%arg: i32) -> f32 { + %res = arith.uitofp %arg : i32 to f32 + return %res : f32 +} + +func.func @check_uitofp(%arg : i32) -> () { + %res = func.call @emulate_uitofp(%arg) : (i32) -> (f32) + vector.print %res : f32 + return +} + +func.func @entry() { + %cst0 = arith.constant 0 : i32 + %cst1 = arith.constant 1 : i32 + %cst2 = arith.constant 2 : i32 + %cst7 = arith.constant 7 : i32 + %cst1337 = arith.constant 1337 : i32 + %cst_i16_max = arith.constant 65535 : i32 + %cst_i16_overflow = arith.constant 65536 : i32 + + %cst_n1 = arith.constant -1 : i32 + %cst_n13 = arith.constant -13 : i32 + %cst_n1337 = arith.constant -1337 : i32 + + %cst_i16_min = arith.constant -32768 : i32 + + %cst_f32_int_max = arith.constant 16777217 : i32 + %cst_f32_int_min = arith.constant -16777217 : i32 + + // CHECK: 0 + func.call @check_uitofp(%cst0) : (i32) -> () + // CHECK-NEXT: 1 + func.call @check_uitofp(%cst1) : (i32) -> () + // CHECK-NEXT: 2 + func.call @check_uitofp(%cst2) : (i32) -> () + // CHECK-NEXT: 7 + func.call @check_uitofp(%cst7) : (i32) -> () + // CHECK-NEXT: 1337 + func.call @check_uitofp(%cst1337) : (i32) -> () + // CHECK-NEXT: 65535 + func.call @check_uitofp(%cst_i16_max) : (i32) -> () + // CHECK-NEXT: 65536 + func.call @check_uitofp(%cst_i16_overflow) : (i32) -> () + + // CHECK-NEXT: 4.2{{.+}}e+09 + func.call @check_uitofp(%cst_n1) : (i32) -> () + // CHECK-NEXT: 4.2{{.+}}e+09 + func.call @check_uitofp(%cst_n1337) : (i32) -> () + + // CHECK-NEXT: 4.2{{.+}}e+09 + func.call @check_uitofp(%cst_i16_min) : (i32) -> () + // CHECK-NEXT: 4.2{{.+}}e+09 + func.call @check_uitofp(%cst_i16_min) : (i32) -> () + // CHECK-NEXT: 1.6{{.+}}e+07 + func.call @check_uitofp(%cst_f32_int_max) : (i32) -> () + // CHECK-NEXT: 4.2{{.+}}e+09 + func.call @check_uitofp(%cst_f32_int_min) : (i32) -> () + + return +} diff --git a/mlir/test/Target/LLVMIR/Import/instructions.ll b/mlir/test/Target/LLVMIR/Import/instructions.ll index cbdb0ebe295ef..3f5ade4f15735 100644 --- a/mlir/test/Target/LLVMIR/Import/instructions.ll +++ b/mlir/test/Target/LLVMIR/Import/instructions.ll @@ -523,10 +523,13 @@ define void @gep_dynamic_idx(ptr %ptr, i32 %idx) { ; CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] define void @freeze(i32 %arg1) { ; CHECK: %[[UNDEF:[0-9]+]] = llvm.mlir.undef : i64 + ; CHECK: %[[POISON:[0-9]+]] = llvm.mlir.poison : i16 ; CHECK: llvm.freeze %[[ARG1]] : i32 ; CHECK: llvm.freeze %[[UNDEF]] : i64 + ; CHECK: llvm.freeze %[[POISON]] : i16 %1 = freeze i32 %arg1 %2 = freeze i64 undef + %3 = freeze i16 poison ret void } diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir index 46120cb348296..6d340bc57fcd1 100644 --- a/mlir/test/Target/LLVMIR/llvmir.mlir +++ b/mlir/test/Target/LLVMIR/llvmir.mlir @@ -1618,6 +1618,9 @@ llvm.func @callFreezeOp(%x : i32) { %1 = llvm.mlir.undef : i32 // CHECK: freeze i32 undef %2 = llvm.freeze %1 : i32 + %3 = llvm.mlir.poison : i32 + // CHECK: freeze i32 poison + %4 = llvm.freeze %3 : i32 llvm.return } diff --git a/mlir/test/Transforms/memref-bound-check.mlir b/mlir/test/Transforms/memref-bound-check.mlir index fce6bdbca4aa1..80909abee51d6 100644 --- a/mlir/test/Transforms/memref-bound-check.mlir +++ b/mlir/test/Transforms/memref-bound-check.mlir @@ -201,7 +201,7 @@ func.func @out_of_bounds() { // This test case accesses within bounds. Without removal of a certain type of // trivially redundant constraints (those differing only in their constant // term), the number of constraints here explodes, and this would return out of -// bounds errors conservatively due to FlatAffineConstraints::kExplosionFactor. +// bounds errors conservatively due to IntegerRelation::kExplosionFactor. #map3 = affine_map<(d0, d1) -> ((d0 * 72 + d1) floordiv 2304 + ((((d0 * 72 + d1) mod 2304) mod 1152) mod 9) floordiv 3)> #map4 = affine_map<(d0, d1) -> ((d0 * 72 + d1) mod 2304 - (((d0 * 72 + d1) mod 2304) floordiv 1152) * 1151 - ((((d0 * 72 + d1) mod 2304) mod 1152) floordiv 9) * 9 - (((((d0 * 72 + d1) mod 2304) mod 1152) mod 9) floordiv 3) * 3)> #map5 = affine_map<(d0, d1) -> (((((d0 * 72 + d1) mod 2304) mod 1152) floordiv 9) floordiv 8)> diff --git a/mlir/test/Transforms/memref-dependence-check.mlir b/mlir/test/Transforms/memref-dependence-check.mlir index 3a16a33a1ed11..f272277cc7904 100644 --- a/mlir/test/Transforms/memref-dependence-check.mlir +++ b/mlir/test/Transforms/memref-dependence-check.mlir @@ -636,7 +636,7 @@ func.func @mod_deps() { affine.for %i0 = 0 to 10 { %a0 = affine.apply affine_map<(d0) -> (d0 mod 2)> (%i0) // Results are conservative here since we currently don't have a way to - // represent strided sets in FlatAffineConstraints. + // represent strided sets in FlatAffineValueConstraints. %v0 = affine.load %m[%a0] : memref<100xf32> // expected-remark@above {{dependence from 0 to 0 at depth 1 = false}} // expected-remark@above {{dependence from 0 to 0 at depth 2 = false}} diff --git a/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp b/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp index 61428bbf7091f..b31dd3f7d866f 100644 --- a/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp +++ b/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp @@ -215,6 +215,9 @@ void VectorizerTestPass::testComposeMaps(llvm::raw_ostream &outs) { .getValue(); maps.push_back(map); } + if (maps.empty()) + // Nothing to compose + return; AffineMap res; for (auto m : maps) { res = res ? res.compose(m) : m; diff --git a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp index 5a21bff0b39c3..7a4f9cf5e5101 100644 --- a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp +++ b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include #include +#include #include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" @@ -22,6 +22,7 @@ #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" #include "mlir/Dialect/Vector/Transforms/VectorDistribution.h" #include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h" #include "mlir/Dialect/Vector/Transforms/VectorTransforms.h" @@ -136,11 +137,6 @@ struct TestVectorContractionLowering *this, "vector-outerproduct", llvm::cl::desc("Lower vector.contract to vector.outerproduct"), llvm::cl::init(false)}; - Option lowerToFilterOuterProduct{ - *this, "vector-filter-outerproduct", - llvm::cl::desc("Lower vector.contract to vector.outerproduct but not for " - "vectors of size 4."), - llvm::cl::init(false)}; Option lowerToParallelArith{ *this, "vector-parallel-arith", llvm::cl::desc("Lower vector.contract to elementwise vector ops."), @@ -153,24 +149,9 @@ struct TestVectorContractionLowering if (lowerToOuterProduct) { VectorContractLowering lowering = VectorContractLowering::OuterProduct; VectorTransformsOptions options{lowering}; - patterns.add(options, - &getContext()); - (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); - return; - } - - // Test on one pattern in isolation. - if (lowerToFilterOuterProduct) { - VectorContractLowering lowering = VectorContractLowering::OuterProduct; - VectorTransformsOptions options{lowering}; - patterns.add( - options, &getContext(), /*benefit=*/1, [](vector::ContractionOp op) { - // Only lowers vector.contract where the lhs as a type vector - // where M is not 4. - if (op.getRhsType().getShape()[0] == 4) - return failure(); - return success(); - }); + populateVectorContractLoweringPatterns( + patterns, options, /*benefit=*/1, + /*disableOuterProductlowering=*/true); (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); return; } @@ -490,7 +471,7 @@ struct TestVectorTransferFullPartialSplitPatterns options.setVectorTransferSplit(VectorTransferSplit::LinalgCopy); else options.setVectorTransferSplit(VectorTransferSplit::VectorTransfer); - patterns.add(ctx, options); + populateVectorTransferFullPartialPatterns(patterns, options); (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); } }; diff --git a/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp b/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp index cc55b96d5b1a8..6beb9384c8bf2 100644 --- a/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp +++ b/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp @@ -594,12 +594,12 @@ TEST(IntegerPolyhedronTest, removeRedundantConstraintsTest) { TEST(IntegerPolyhedronTest, addConstantUpperBound) { IntegerPolyhedron poly(PresburgerSpace::getSetSpace(2)); - poly.addBound(IntegerPolyhedron::UB, 0, 1); + poly.addBound(BoundType::UB, 0, 1); EXPECT_EQ(poly.atIneq(0, 0), -1); EXPECT_EQ(poly.atIneq(0, 1), 0); EXPECT_EQ(poly.atIneq(0, 2), 1); - poly.addBound(IntegerPolyhedron::UB, {1, 2, 3}, 1); + poly.addBound(BoundType::UB, {1, 2, 3}, 1); EXPECT_EQ(poly.atIneq(1, 0), -1); EXPECT_EQ(poly.atIneq(1, 1), -2); EXPECT_EQ(poly.atIneq(1, 2), -2); @@ -607,12 +607,12 @@ TEST(IntegerPolyhedronTest, addConstantUpperBound) { TEST(IntegerPolyhedronTest, addConstantLowerBound) { IntegerPolyhedron poly(PresburgerSpace::getSetSpace(2)); - poly.addBound(IntegerPolyhedron::LB, 0, 1); + poly.addBound(BoundType::LB, 0, 1); EXPECT_EQ(poly.atIneq(0, 0), 1); EXPECT_EQ(poly.atIneq(0, 1), 0); EXPECT_EQ(poly.atIneq(0, 2), -1); - poly.addBound(IntegerPolyhedron::LB, {1, 2, 3}, 1); + poly.addBound(BoundType::LB, {1, 2, 3}, 1); EXPECT_EQ(poly.atIneq(1, 0), 1); EXPECT_EQ(poly.atIneq(1, 1), 2); EXPECT_EQ(poly.atIneq(1, 2), 2); diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp index 09c8e808db463..22ab7436f75b5 100644 --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -134,7 +134,7 @@ int DeviceTy::disassociatePtr(void *HstPtrBegin) { if (Event) destroyEvent(Event); HDTTMap->erase(It); - return OFFLOAD_SUCCESS; + return notifyDataUnmapped(HstPtrBegin); } REPORT("Trying to disassociate a pointer which was not mapped via " diff --git a/openmp/libomptarget/test/offloading/target_constexpr_mapping.cpp b/openmp/libomptarget/test/offloading/target_constexpr_mapping.cpp new file mode 100644 index 0000000000000..14cf92a7cc26e --- /dev/null +++ b/openmp/libomptarget/test/offloading/target_constexpr_mapping.cpp @@ -0,0 +1,34 @@ +// RUN: %libomptarget-compileoptxx-run-and-check-generic + +#include +#include + +#pragma omp declare target +class A { +public: + constexpr static double pi = 3.141592653589793116; + A() { ; } + ~A() { ; } +}; +#pragma omp end declare target + +#pragma omp declare target +constexpr static double anotherPi = 3.14; +#pragma omp end declare target + +int main() { + double a[2]; +#pragma omp target map(tofrom : a[:2]) + { + a[0] = A::pi; + a[1] = anotherPi; + } + + // CHECK: pi = 3.141592653589793116 + printf("pi = %.18f\n", a[0]); + + // CHECK: anotherPi = 3.14 + printf("anotherPi = %.2f\n", a[1]); + + return 0; +} diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index 55064fba0bf88..eff9752b785f5 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -569,28 +569,18 @@ cc_library( ) cc_library( - name = "tblgen", - alwayslink = True, + name = "LLVMTableGenGlobalISel", srcs = glob([ - "utils/TableGen/*.cpp", - "utils/TableGen/*.inc", "utils/TableGen/GlobalISel/*.cpp", - - # Some tablegen sources include headers from MC, so these have to be - # listed here. MC uses headers produced by tablegen, so it cannot be a - # regular dependency. - "include/llvm/MC/*.h", - + ]) + [ + "utils/TableGen/CodeGenInstruction.h", + ], + hdrs = glob([ # We have to include these headers here as well as in the `hdrs` below # to allow the `.cpp` files to use file-relative-inclusion to find # them, even though consumers of this library use inclusion relative to # `utils/TableGen` with the `strip_includes_prefix` of this library. # This mixture appears to be incompatible with header modules. - "utils/TableGen/*.h", - "utils/TableGen/GlobalISel/*.h", - ]), - hdrs = glob([ - "utils/TableGen/*.h", "utils/TableGen/GlobalISel/*.h", ]), copts = llvm_copts, @@ -605,10 +595,23 @@ cc_library( cc_binary( name = "llvm-tblgen", + srcs = glob([ + "utils/TableGen/*.cpp", + "utils/TableGen/*.inc", + "utils/TableGen/*.h", + + # Some tablegen sources include headers from MC, so these have to be + # listed here. MC uses headers produced by tablegen, so it cannot be a + # regular dependency. + "include/llvm/MC/*.h", + ]), copts = llvm_copts, stamp = 0, deps = [ - ":tblgen", + ":LLVMTableGenGlobalISel", + ":Support", + ":TableGen", + ":config", ], ) diff --git a/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel index a37041af5e8dc..4996a0ac93ab7 100644 --- a/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel @@ -675,7 +675,7 @@ cc_test( ":automata_tables_gen", "//llvm:Support", "//llvm:TableGen", - "//llvm:tblgen", + "//llvm:LLVMTableGenGlobalISel", "//third-party/unittest:gmock", "//third-party/unittest:gtest", "//third-party/unittest:gtest_main", diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 4071d92641839..f565030d63d9f 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -5607,6 +5607,7 @@ cc_library( deps = [ ":AffineDialect", ":ArithDialect", + ":ArithUtils", ":DialectUtils", ":TensorDialect", "//llvm:Support", @@ -5663,6 +5664,7 @@ cc_library( ":TensorPassIncGen", ":TilingInterface", ":Transforms", + ":VectorDialect", "//llvm:Support", ], ) @@ -8537,6 +8539,7 @@ cc_library( ":TransformDialect", ":TransformDialectUtils", ":TransformUtils", + ":VectorTransforms", "//llvm:Support", ], )