diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 72923419e0b2f..b0c91c6973126 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -55,3 +55,6 @@ dd3c26a045c081620375a878159f536758baba6e f98ee40f4b5d7474fc67e82824bf6abbaedb7b1c 2238dcc39358353cac21df75c3c3286ab20b8f53 f9008e6366c2496b1ca1785b891d5578174ad63e + +# [libc++][NFC] Apply clang-format on large parts of the code base +5aa03b648b827128d439f705cd7d57d59673741d diff --git a/bolt/include/bolt/Core/BinarySection.h b/bolt/include/bolt/Core/BinarySection.h index 6890ced83b13b..078b33bc9c5ab 100644 --- a/bolt/include/bolt/Core/BinarySection.h +++ b/bolt/include/bolt/Core/BinarySection.h @@ -59,7 +59,7 @@ class BinarySection { // Relocations associated with this section. Relocation offsets are // wrt. to the original section address and size. - using RelocationSetType = std::set>; + using RelocationSetType = std::multiset>; RelocationSetType Relocations; // Dynamic relocations associated with this section. Relocation offsets are @@ -345,7 +345,8 @@ class BinarySection { bool removeRelocationAt(uint64_t Offset) { auto Itr = Relocations.find(Offset); if (Itr != Relocations.end()) { - Relocations.erase(Itr); + auto End = Relocations.upper_bound(Offset); + Relocations.erase(Itr, End); return true; } return false; diff --git a/bolt/include/bolt/Core/Relocation.h b/bolt/include/bolt/Core/Relocation.h index 1296c001db57c..5ae288a91986e 100644 --- a/bolt/include/bolt/Core/Relocation.h +++ b/bolt/include/bolt/Core/Relocation.h @@ -14,10 +14,11 @@ #ifndef BOLT_CORE_RELOCATION_H #define BOLT_CORE_RELOCATION_H +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/TargetParser/Triple.h" namespace llvm { -class MCStreamer; class MCSymbol; class raw_ostream; @@ -122,8 +123,36 @@ struct Relocation { /// responsible for setting the position correctly. size_t emit(MCStreamer *Streamer) const; + /// Emit a group of composed relocations. All relocations must have the same + /// offset. If std::distance(Begin, End) == 1, this is equivalent to + /// Begin->emit(Streamer). + template + static size_t emit(RelocIt Begin, RelocIt End, MCStreamer *Streamer) { + if (Begin == End) + return 0; + + const MCExpr *Value = nullptr; + + for (auto RI = Begin; RI != End; ++RI) { + assert(RI->Offset == Begin->Offset && + "emitting composed relocations with different offsets"); + Value = RI->createExpr(Streamer, Value); + } + + assert(Value && "failed to create relocation value"); + auto Size = std::prev(End)->getSize(); + Streamer->emitValue(Value, Size); + return Size; + } + /// Print a relocation to \p OS. void print(raw_ostream &OS) const; + +private: + const MCExpr *createExpr(MCStreamer *Streamer) const; + const MCExpr *createExpr(MCStreamer *Streamer, + const MCExpr *RetainedValue) const; + static MCBinaryExpr::Opcode getComposeOpcodeFor(uint64_t Type); }; /// Relocation ordering by offset. diff --git a/bolt/lib/Core/BinarySection.cpp b/bolt/lib/Core/BinarySection.cpp index e0e7b15d04a57..6742302a98e67 100644 --- a/bolt/lib/Core/BinarySection.cpp +++ b/bolt/lib/Core/BinarySection.cpp @@ -90,23 +90,46 @@ void BinarySection::emitAsData(MCStreamer &Streamer, Streamer.emitBytes(SectionContents); } else { uint64_t SectionOffset = 0; - for (const Relocation &Relocation : relocations()) { - assert(Relocation.Offset < SectionContents.size() && "overflow detected"); + for (auto RI = Relocations.begin(), RE = Relocations.end(); RI != RE;) { + auto RelocationOffset = RI->Offset; + assert(RelocationOffset < SectionContents.size() && "overflow detected"); + + if (SectionOffset < RelocationOffset) { + Streamer.emitBytes(SectionContents.substr( + SectionOffset, RelocationOffset - SectionOffset)); + SectionOffset = RelocationOffset; + } + + // Get iterators to all relocations with the same offset. Usually, there + // is only one such relocation but there can be more for composed + // relocations. + auto ROI = RI; + auto ROE = Relocations.upper_bound(RelocationOffset); + + // Start from the next offset on the next iteration. + RI = ROE; + // Skip undefined symbols. - if (BC.UndefinedSymbols.count(Relocation.Symbol)) + auto HasUndefSym = [this](const auto &Relocation) { + return BC.UndefinedSymbols.count(Relocation.Symbol); + }; + + if (std::any_of(ROI, ROE, HasUndefSym)) continue; - if (SectionOffset < Relocation.Offset) { - Streamer.emitBytes(SectionContents.substr( - SectionOffset, Relocation.Offset - SectionOffset)); - SectionOffset = Relocation.Offset; + +#ifndef NDEBUG + for (const auto &Relocation : make_range(ROI, ROE)) { + LLVM_DEBUG( + dbgs() << "BOLT-DEBUG: emitting relocation for symbol " + << (Relocation.Symbol ? Relocation.Symbol->getName() + : StringRef("")) + << " at offset 0x" << Twine::utohexstr(Relocation.Offset) + << " with size " + << Relocation::getSizeForType(Relocation.Type) << '\n'); } - LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitting relocation for symbol " - << (Relocation.Symbol ? Relocation.Symbol->getName() - : StringRef("")) - << " at offset 0x" - << Twine::utohexstr(Relocation.Offset) << " with size " - << Relocation::getSizeForType(Relocation.Type) << '\n'); - size_t RelocationSize = Relocation.emit(&Streamer); +#endif + + size_t RelocationSize = Relocation::emit(ROI, ROE, &Streamer); SectionOffset += RelocationSize; } assert(SectionOffset <= SectionContents.size() && "overflow error"); @@ -221,9 +244,7 @@ BinarySection::reorderRelocations(bool Inplace) const { assert(NewRel.Offset < getSize()); LLVM_DEBUG(dbgs() << "BOLT-DEBUG: moving " << Rel << " -> " << NewRel << "\n"); - auto Res = NewRelocations.emplace(std::move(NewRel)); - (void)Res; - assert(Res.second && "Can't overwrite existing relocation"); + NewRelocations.emplace(std::move(NewRel)); } return NewRelocations; } diff --git a/bolt/lib/Core/Relocation.cpp b/bolt/lib/Core/Relocation.cpp index 0901a1465193c..b02ca61964a74 100644 --- a/bolt/lib/Core/Relocation.cpp +++ b/bolt/lib/Core/Relocation.cpp @@ -817,39 +817,48 @@ uint64_t Relocation::getRelative() { size_t Relocation::emit(MCStreamer *Streamer) const { const size_t Size = getSizeForType(Type); + const auto *Value = createExpr(Streamer); + Streamer->emitValue(Value, Size); + return Size; +} + +const MCExpr *Relocation::createExpr(MCStreamer *Streamer) const { MCContext &Ctx = Streamer->getContext(); + const MCExpr *Value = nullptr; + + if (Symbol && Addend) { + Value = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Symbol, Ctx), + MCConstantExpr::create(Addend, Ctx), Ctx); + } else if (Symbol) { + Value = MCSymbolRefExpr::create(Symbol, Ctx); + } else { + Value = MCConstantExpr::create(Addend, Ctx); + } + if (isPCRelative(Type)) { MCSymbol *TempLabel = Ctx.createNamedTempSymbol(); Streamer->emitLabel(TempLabel); - const MCExpr *Value = nullptr; - if (Symbol) { - Value = MCSymbolRefExpr::create(Symbol, Ctx); - if (Addend) { - Value = MCBinaryExpr::createAdd( - Value, MCConstantExpr::create(Addend, Ctx), Ctx); - } - } else { - Value = MCConstantExpr::create(Addend, Ctx); - } Value = MCBinaryExpr::createSub( Value, MCSymbolRefExpr::create(TempLabel, Ctx), Ctx); - Streamer->emitValue(Value, Size); - - return Size; } - if (Symbol && Addend) { - auto Value = - MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Symbol, Ctx), - MCConstantExpr::create(Addend, Ctx), Ctx); - Streamer->emitValue(Value, Size); - } else if (Symbol) { - Streamer->emitSymbolValue(Symbol, Size); - } else { - Streamer->emitIntValue(Addend, Size); + return Value; +} + +const MCExpr *Relocation::createExpr(MCStreamer *Streamer, + const MCExpr *RetainedValue) const { + const auto *Value = createExpr(Streamer); + + if (RetainedValue) { + Value = MCBinaryExpr::create(getComposeOpcodeFor(Type), RetainedValue, + Value, Streamer->getContext()); } - return Size; + return Value; +} + +MCBinaryExpr::Opcode Relocation::getComposeOpcodeFor(uint64_t Type) { + llvm_unreachable("not implemented"); } #define ELF_RELOC(name, value) #name, diff --git a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp index fc392fec36865..8cfda506fc254 100644 --- a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp +++ b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp @@ -214,6 +214,10 @@ class ASTWalker : public RecursiveASTVisitor { return true; } bool VisitVarDecl(VarDecl *VD) { + // Ignore the parameter decl itself (its children were handled elsewhere), + // as they don't contribute to the main-file #include. + if (llvm::isa(VD)) + return true; // Mark declaration from definition as it needs type-checking. if (VD->isThisDeclarationADefinition()) report(VD->getLocation(), VD); diff --git a/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp b/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp index 918f7c968ef90..008da47164092 100644 --- a/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp +++ b/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp @@ -108,10 +108,18 @@ class Action : public clang::ASTFrontendAction { } void EndSourceFile() override { + const auto &SM = getCompilerInstance().getSourceManager(); + if (SM.getDiagnostics().hasUncompilableErrorOccurred()) { + llvm::errs() + << "Skipping file " << getCurrentFile() + << " due to compiler errors. clang-include-cleaner expects to " + "work on compilable source code.\n"; + return; + } + if (!HTMLReportPath.empty()) writeHTML(); - const auto &SM = getCompilerInstance().getSourceManager(); auto &HS = getCompilerInstance().getPreprocessor().getHeaderSearchInfo(); llvm::StringRef Path = SM.getFileEntryForID(SM.getMainFileID())->tryGetRealPathName(); diff --git a/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp b/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp index bad55e1433549..5b5f77b5fdea8 100644 --- a/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp @@ -91,7 +91,8 @@ TEST_F(WalkUsedTest, Basic) { #include "header.h" #include "private.h" - void $bar^bar($private^Private $p^p) { + // No reference reported for the Parameter "p". + void $bar^bar($private^Private p) { $foo^foo(); std::$vector^vector $vconstructor^$v^v; $builtin^__builtin_popcount(1); @@ -120,7 +121,6 @@ TEST_F(WalkUsedTest, Basic) { offsetToProviders(AST, SM), UnorderedElementsAre( Pair(Code.point("bar"), UnorderedElementsAre(MainFile)), - Pair(Code.point("p"), UnorderedElementsAre(MainFile)), Pair(Code.point("private"), UnorderedElementsAre(PublicFile, PrivateFile)), Pair(Code.point("foo"), UnorderedElementsAre(HeaderFile)), diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 9944cb28f0487..ab1fe885a3a75 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -645,6 +645,10 @@ Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±in rounding halfway cases to even (that is, to the nearest value that is an even integer), regardless of the current rounding direction. + T __builtin_elementwise_round(T x) round x to the nearest integer value in floating point format, floating point types + rounding halfway cases away from zero, regardless of the + current rounding direction. May raise floating-point + exceptions. T __builtin_elementwise_trunc(T x) return the integral value nearest to but no larger in floating point types magnitude than x T __builtin_elementwise_canonicalize(T x) return the platform specific canonical encoding floating point types diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 6cee1c83706aa..7c6a350c7eab2 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -228,6 +228,10 @@ Non-comprehensive list of changes in this release variable as a way to disable color diagnostics. - Clang now supports ``__builtin_isfpclass``, which checks if the specified floating-point value falls into any of the specified data classes. +- Added ``__builtin_elementwise_round`` for builtin for floating + point types. This allows access to ``llvm.round`` for + arbitrary floating-point and vector of floating-point types. + New Compiler Flags ------------------ diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index 59b61b89bd245..aa0076aab951c 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -687,6 +687,7 @@ BUILTIN(__builtin_elementwise_log, "v.", "nct") BUILTIN(__builtin_elementwise_log2, "v.", "nct") BUILTIN(__builtin_elementwise_log10, "v.", "nct") BUILTIN(__builtin_elementwise_roundeven, "v.", "nct") +BUILTIN(__builtin_elementwise_round, "v.", "nct") BUILTIN(__builtin_elementwise_sin, "v.", "nct") BUILTIN(__builtin_elementwise_trunc, "v.", "nct") BUILTIN(__builtin_elementwise_canonicalize, "v.", "nct") diff --git a/clang/include/clang/CodeGen/CodeGenAction.h b/clang/include/clang/CodeGen/CodeGenAction.h index b5721344046d0..821e80919fc84 100644 --- a/clang/include/clang/CodeGen/CodeGenAction.h +++ b/clang/include/clang/CodeGen/CodeGenAction.h @@ -53,6 +53,9 @@ class CodeGenAction : public ASTFrontendAction { std::unique_ptr loadModule(llvm::MemoryBufferRef MBRef); + /// Load bitcode modules to link into our module from the options. + bool loadLinkModules(CompilerInstance &CI); + protected: /// Create a new code generation action. If the optional \p _VMContext /// parameter is supplied, the action uses it without taking ownership, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 87df2266bd7e3..cf002e772df02 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2266,10 +2266,8 @@ defm xray_function_index : BoolFOption<"xray-function-index", " expense of single-function patching performance">>; def fxray_link_deps : Flag<["-"], "fxray-link-deps">, Group, - Flags<[CC1Option]>, - HelpText<"Tells clang to add the link dependencies for XRay.">; -def fnoxray_link_deps : Flag<["-"], "fnoxray-link-deps">, Group, - Flags<[CC1Option]>; + HelpText<"Link XRay runtime library when -fxray-instrument is specified (default)">; +def fno_xray_link_deps : Flag<["-"], "fno-xray-link-deps">, Group; def fxray_instrumentation_bundle : Joined<["-"], "fxray-instrumentation-bundle=">, diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index a93eb3d38a480..7019bc5922ebc 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1099,6 +1099,8 @@ enum PredefinedTypeIDs { // \brief WebAssembly reference types with auto numeration #define WASM_TYPE(Name, Id, SingletonId) PREDEF_TYPE_##Id##_ID, #include "clang/Basic/WebAssemblyReferenceTypes.def" + // Sentinel value. Considered a predefined type but not useable as one. + PREDEF_TYPE_LAST_ID }; /// The number of predefined type IDs that are reserved for @@ -1106,7 +1108,13 @@ enum PredefinedTypeIDs { /// /// Type IDs for non-predefined types will start at /// NUM_PREDEF_TYPE_IDs. -const unsigned NUM_PREDEF_TYPE_IDS = 300; +const unsigned NUM_PREDEF_TYPE_IDS = 500; + +// Ensure we do not overrun the predefined types we reserved +// in the enum PredefinedTypeIDs above. +static_assert(PREDEF_TYPE_LAST_ID < NUM_PREDEF_TYPE_IDS, + "Too many enumerators in PredefinedTypeIDs. Review the value of " + "NUM_PREDEF_TYPE_IDS"); /// Record codes for each kind of type. /// diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 69438960ebf23..7a38ac4ba10cb 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3194,6 +3194,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_elementwise_roundeven: return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::roundeven, "elt.roundeven")); + case Builtin::BI__builtin_elementwise_round: + return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::round, + "elt.round")); case Builtin::BI__builtin_elementwise_sin: return RValue::get( emitUnaryBuiltin(*this, E, llvm::Intrinsic::sin, "elt.sin")); diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index fc77566c790c1..61029c9226d7b 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1845,8 +1845,9 @@ addMergableDefaultFunctionAttributes(const CodeGenOptions &CodeGenOpts, FuncAttrs); } -void CodeGenModule::getTrivialDefaultFunctionAttributes( - StringRef Name, bool HasOptnone, bool AttrOnCallSite, +static void getTrivialDefaultFunctionAttributes( + StringRef Name, bool HasOptnone, const CodeGenOptions &CodeGenOpts, + const LangOptions &LangOpts, bool AttrOnCallSite, llvm::AttrBuilder &FuncAttrs) { // OptimizeNoneAttr takes precedence over -Os or -Oz. No warning needed. if (!HasOptnone) { @@ -1967,7 +1968,7 @@ void CodeGenModule::getTrivialDefaultFunctionAttributes( } } - if (getLangOpts().assumeFunctionsAreConvergent()) { + if (LangOpts.assumeFunctionsAreConvergent()) { // Conservatively, mark all functions and calls in CUDA and OpenCL as // convergent (meaning, they may call an intrinsically convergent op, such // as __syncthreads() / barrier(), and so can't have certain optimizations @@ -1978,8 +1979,8 @@ void CodeGenModule::getTrivialDefaultFunctionAttributes( // TODO: NoUnwind attribute should be added for other GPU modes HIP, // OpenMP offload. AFAIK, neither of them support exceptions in device code. - if ((getLangOpts().CUDA && getLangOpts().CUDAIsDevice) || - getLangOpts().OpenCL || getLangOpts().SYCLIsDevice) { + if ((LangOpts.CUDA && LangOpts.CUDAIsDevice) || LangOpts.OpenCL || + LangOpts.SYCLIsDevice) { FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); } @@ -1990,36 +1991,25 @@ void CodeGenModule::getTrivialDefaultFunctionAttributes( } } -void CodeGenModule::getDefaultFunctionAttributes(StringRef Name, - bool HasOptnone, - bool AttrOnCallSite, - llvm::AttrBuilder &FuncAttrs) { - getTrivialDefaultFunctionAttributes(Name, HasOptnone, AttrOnCallSite, - FuncAttrs); - if (!AttrOnCallSite) { - // If we're just getting the default, get the default values for mergeable - // attributes. - addMergableDefaultFunctionAttributes(CodeGenOpts, FuncAttrs); - } -} +/// Adds attributes to \p F according to our \p CodeGenOpts and \p LangOpts, as +/// though we had emitted it ourselves. We remove any attributes on F that +/// conflict with the attributes we add here. +static void mergeDefaultFunctionDefinitionAttributes( + llvm::Function &F, const CodeGenOptions CodeGenOpts, + const LangOptions &LangOpts, const TargetOptions &TargetOpts, + bool WillInternalize) { -void CodeGenModule::addDefaultFunctionDefinitionAttributes(llvm::Function &F) { llvm::AttrBuilder FuncAttrs(F.getContext()); - getDefaultFunctionAttributes(F.getName(), F.hasOptNone(), - /* AttrOnCallSite = */ false, FuncAttrs); - // TODO: call GetCPUAndFeaturesAttributes? - F.addFnAttrs(FuncAttrs); -} + // Here we only extract the options that are relevant compared to the version + // from GetCPUAndFeaturesAttributes. + if (!TargetOpts.CPU.empty()) + FuncAttrs.addAttribute("target-cpu", TargetOpts.CPU); + if (!TargetOpts.TuneCPU.empty()) + FuncAttrs.addAttribute("tune-cpu", TargetOpts.TuneCPU); -/// Apply default attributes to \p F, accounting for merge semantics of -/// attributes that should not overwrite existing attributes. -void CodeGenModule::mergeDefaultFunctionDefinitionAttributes( - llvm::Function &F, bool WillInternalize) { - llvm::AttrBuilder FuncAttrs(F.getContext()); - getTrivialDefaultFunctionAttributes(F.getName(), F.hasOptNone(), - /*AttrOnCallSite=*/false, FuncAttrs); - GetCPUAndFeaturesAttributes(GlobalDecl(), FuncAttrs, - /*AddTargetFeatures=*/false); + ::getTrivialDefaultFunctionAttributes(F.getName(), F.hasOptNone(), + CodeGenOpts, LangOpts, + /*AttrOnCallSite=*/false, FuncAttrs); if (!WillInternalize && F.isInterposable()) { // Do not promote "dynamic" denormal-fp-math to this translation unit's @@ -2064,6 +2054,52 @@ void CodeGenModule::mergeDefaultFunctionDefinitionAttributes( F.addFnAttrs(FuncAttrs); } +void clang::CodeGen::mergeDefaultFunctionDefinitionAttributes( + llvm::Function &F, const CodeGenOptions CodeGenOpts, + const LangOptions &LangOpts, const TargetOptions &TargetOpts, + bool WillInternalize) { + + ::mergeDefaultFunctionDefinitionAttributes(F, CodeGenOpts, LangOpts, + TargetOpts, WillInternalize); +} + +void CodeGenModule::getTrivialDefaultFunctionAttributes( + StringRef Name, bool HasOptnone, bool AttrOnCallSite, + llvm::AttrBuilder &FuncAttrs) { + ::getTrivialDefaultFunctionAttributes(Name, HasOptnone, getCodeGenOpts(), + getLangOpts(), AttrOnCallSite, + FuncAttrs); +} + +void CodeGenModule::getDefaultFunctionAttributes(StringRef Name, + bool HasOptnone, + bool AttrOnCallSite, + llvm::AttrBuilder &FuncAttrs) { + getTrivialDefaultFunctionAttributes(Name, HasOptnone, AttrOnCallSite, + FuncAttrs); + // If we're just getting the default, get the default values for mergeable + // attributes. + if (!AttrOnCallSite) + addMergableDefaultFunctionAttributes(CodeGenOpts, FuncAttrs); +} + +void CodeGenModule::addDefaultFunctionDefinitionAttributes(llvm::Function &F) { + llvm::AttrBuilder FuncAttrs(F.getContext()); + getDefaultFunctionAttributes(F.getName(), F.hasOptNone(), + /* AttrOnCallSite = */ false, FuncAttrs); + // TODO: call GetCPUAndFeaturesAttributes? + F.addFnAttrs(FuncAttrs); +} + +/// Apply default attributes to \p F, accounting for merge semantics of +/// attributes that should not overwrite existing attributes. +void CodeGenModule::mergeDefaultFunctionDefinitionAttributes( + llvm::Function &F, bool WillInternalize) { + ::mergeDefaultFunctionDefinitionAttributes(F, getCodeGenOpts(), getLangOpts(), + getTarget().getTargetOpts(), + WillInternalize); +} + void CodeGenModule::addDefaultFunctionDefinitionAttributes( llvm::AttrBuilder &attrs) { getDefaultFunctionAttributes(/*function name*/ "", /*optnone*/ false, diff --git a/clang/lib/CodeGen/CGCall.h b/clang/lib/CodeGen/CGCall.h index 59c3f304f59b9..824f0a9a88299 100644 --- a/clang/lib/CodeGen/CGCall.h +++ b/clang/lib/CodeGen/CGCall.h @@ -30,6 +30,7 @@ class Value; namespace clang { class Decl; class FunctionDecl; +class TargetOptions; class VarDecl; namespace CodeGen { @@ -377,6 +378,14 @@ class ReturnValueSlot { bool isExternallyDestructed() const { return IsExternallyDestructed; } }; +/// Helper to add attributes to \p F according to the CodeGenOptions and +/// LangOptions without requiring a CodeGenModule to be constructed. +void mergeDefaultFunctionDefinitionAttributes(llvm::Function &F, + const CodeGenOptions CodeGenOpts, + const LangOptions &LangOpts, + const TargetOptions &TargetOpts, + bool WillInternalize); + } // end namespace CodeGen } // end namespace clang diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 2fd2227720a2a..f049a682cfed6 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -1945,27 +1945,8 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( ContainingType = RecordTy; } - // We're checking for deleted C++ special member functions - // [Ctors,Dtors, Copy/Move] - auto checkAttrDeleted = [&](const auto *Method) { - if (Method->getCanonicalDecl()->isDeleted()) - SPFlags |= llvm::DISubprogram::SPFlagDeleted; - }; - - switch (Method->getKind()) { - - case Decl::CXXConstructor: - case Decl::CXXDestructor: - checkAttrDeleted(Method); - break; - case Decl::CXXMethod: - if (Method->isCopyAssignmentOperator() || - Method->isMoveAssignmentOperator()) - checkAttrDeleted(Method); - break; - default: - break; - } + if (Method->getCanonicalDecl()->isDeleted()) + SPFlags |= llvm::DISubprogram::SPFlagDeleted; if (Method->isNoReturn()) Flags |= llvm::DINode::FlagNoReturn; diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index 4aa51e956655f..4879bcd6a42a5 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang/CodeGen/CodeGenAction.h" +#include "CGCall.h" #include "CodeGenModule.h" #include "CoverageMappingGen.h" #include "MacroPPCallbacks.h" @@ -262,7 +263,7 @@ namespace clang { } // Links each entry in LinkModules into our module. Returns true on error. - bool LinkInModules() { + bool LinkInModules(llvm::Module *M) { for (auto &LM : LinkModules) { assert(LM.Module && "LinkModule does not actually have a module"); if (LM.PropagateAttrs) @@ -271,8 +272,8 @@ namespace clang { // in LLVM IR. if (F.isIntrinsic()) continue; - Gen->CGM().mergeDefaultFunctionDefinitionAttributes(F, - LM.Internalize); + CodeGen::mergeDefaultFunctionDefinitionAttributes( + F, CodeGenOpts, LangOpts, TargetOpts, LM.Internalize); } CurLinkModule = LM.Module.get(); @@ -280,15 +281,14 @@ namespace clang { bool Err; if (LM.Internalize) { Err = Linker::linkModules( - *getModule(), std::move(LM.Module), LM.LinkFlags, + *M, std::move(LM.Module), LM.LinkFlags, [](llvm::Module &M, const llvm::StringSet<> &GVS) { internalizeModule(M, [&GVS](const llvm::GlobalValue &GV) { return !GV.hasName() || (GVS.count(GV.getName()) == 0); }); }); } else { - Err = Linker::linkModules(*getModule(), std::move(LM.Module), - LM.LinkFlags); + Err = Linker::linkModules(*M, std::move(LM.Module), LM.LinkFlags); } if (Err) @@ -357,7 +357,7 @@ namespace clang { } // Link each LinkModule into our module. - if (LinkInModules()) + if (LinkInModules(getModule())) return; for (auto &F : getModule()->functions()) { @@ -993,6 +993,36 @@ CodeGenAction::~CodeGenAction() { delete VMContext; } +bool CodeGenAction::loadLinkModules(CompilerInstance &CI) { + if (!LinkModules.empty()) + return false; + + for (const CodeGenOptions::BitcodeFileToLink &F : + CI.getCodeGenOpts().LinkBitcodeFiles) { + auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename); + if (!BCBuf) { + CI.getDiagnostics().Report(diag::err_cannot_open_file) + << F.Filename << BCBuf.getError().message(); + LinkModules.clear(); + return true; + } + + Expected> ModuleOrErr = + getOwningLazyBitcodeModule(std::move(*BCBuf), *VMContext); + if (!ModuleOrErr) { + handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) { + CI.getDiagnostics().Report(diag::err_cannot_open_file) + << F.Filename << EIB.message(); + }); + LinkModules.clear(); + return true; + } + LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs, + F.Internalize, F.LinkFlags}); + } + return false; +} + bool CodeGenAction::hasIRSupport() const { return true; } void CodeGenAction::EndSourceFileAction() { @@ -1048,30 +1078,8 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { return nullptr; // Load bitcode modules to link with, if we need to. - if (LinkModules.empty()) - for (const CodeGenOptions::BitcodeFileToLink &F : - CI.getCodeGenOpts().LinkBitcodeFiles) { - auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename); - if (!BCBuf) { - CI.getDiagnostics().Report(diag::err_cannot_open_file) - << F.Filename << BCBuf.getError().message(); - LinkModules.clear(); - return nullptr; - } - - Expected> ModuleOrErr = - getOwningLazyBitcodeModule(std::move(*BCBuf), *VMContext); - if (!ModuleOrErr) { - handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) { - CI.getDiagnostics().Report(diag::err_cannot_open_file) - << F.Filename << EIB.message(); - }); - LinkModules.clear(); - return nullptr; - } - LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs, - F.Internalize, F.LinkFlags}); - } + if (loadLinkModules(CI)) + return nullptr; CoverageSourceInfo *CoverageInfo = nullptr; // Add the preprocessor callback only when the coverage mapping is generated. @@ -1139,6 +1147,10 @@ CodeGenAction::loadModule(MemoryBufferRef MBRef) { return std::move(*MOrErr); } + // Load bitcode modules to link with, if we need to. + if (loadLinkModules(CI)) + return nullptr; + llvm::SMDiagnostic Err; if (std::unique_ptr M = parseIR(MBRef, Err, *VMContext)) return M; @@ -1218,6 +1230,11 @@ void CodeGenAction::ExecuteAction() { CI.getCodeGenOpts(), CI.getTargetOpts(), CI.getLangOpts(), TheModule.get(), std::move(LinkModules), *VMContext, nullptr); + + // Link in each pending link module. + if (Result.LinkInModules(&*TheModule)) + return; + // PR44896: Force DiscardValueNames as false. DiscardValueNames cannot be // true here because the valued names are needed for reading textual IR. Ctx.setDiscardValueNames(false); diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 165b1f9ae1300..2d9392893bc36 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -1236,11 +1236,11 @@ bool tools::addXRayRuntime(const ToolChain&TC, const ArgList &Args, ArgStringLis return false; if (TC.getXRayArgs().needsXRayRt()) { - CmdArgs.push_back("-whole-archive"); + CmdArgs.push_back("--whole-archive"); CmdArgs.push_back(TC.getCompilerRTArgString(Args, "xray")); for (const auto &Mode : TC.getXRayArgs().modeList()) CmdArgs.push_back(TC.getCompilerRTArgString(Args, Mode)); - CmdArgs.push_back("-no-whole-archive"); + CmdArgs.push_back("--no-whole-archive"); return true; } diff --git a/clang/lib/Driver/XRayArgs.cpp b/clang/lib/Driver/XRayArgs.cpp index f15a91f1aba44..9a4b28576a9b9 100644 --- a/clang/lib/Driver/XRayArgs.cpp +++ b/clang/lib/Driver/XRayArgs.cpp @@ -74,7 +74,7 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) { << XRayInstrument->getSpelling() << A->getSpelling(); if (!Args.hasFlag(options::OPT_fxray_link_deps, - options::OPT_fnoxray_link_deps, true)) + options::OPT_fno_xray_link_deps, true)) XRayRT = false; auto Bundles = diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index f6c04febfd192..dca8853f1c204 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1894,7 +1894,7 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, if (Arg *A = Args.getLastArg(OPT_ftlsmodel_EQ)) { if (T.isOSAIX()) { StringRef Name = A->getValue(); - if (Name != "global-dynamic") + if (Name != "global-dynamic" && Name != "local-exec") Diags.Report(diag::err_aix_unsupported_tls_model) << Name; } } diff --git a/clang/lib/Headers/__clang_hip_libdevice_declares.h b/clang/lib/Headers/__clang_hip_libdevice_declares.h index be25f4b4a0506..2fc5e17d2f1ea 100644 --- a/clang/lib/Headers/__clang_hip_libdevice_declares.h +++ b/clang/lib/Headers/__clang_hip_libdevice_declares.h @@ -137,23 +137,6 @@ __device__ __attribute__((const)) float __ocml_fma_rte_f32(float, float, float); __device__ __attribute__((const)) float __ocml_fma_rtn_f32(float, float, float); __device__ __attribute__((const)) float __ocml_fma_rtp_f32(float, float, float); __device__ __attribute__((const)) float __ocml_fma_rtz_f32(float, float, float); - -__device__ inline __attribute__((const)) float -__llvm_amdgcn_cos_f32(float __x) { - return __builtin_amdgcn_cosf(__x); -} -__device__ inline __attribute__((const)) float -__llvm_amdgcn_rcp_f32(float __x) { - return __builtin_amdgcn_rcpf(__x); -} -__device__ inline __attribute__((const)) float -__llvm_amdgcn_rsq_f32(float __x) { - return __builtin_amdgcn_rsqf(__x); -} -__device__ inline __attribute__((const)) float -__llvm_amdgcn_sin_f32(float __x) { - return __builtin_amdgcn_sinf(__x); -} // END INTRINSICS // END FLOAT @@ -277,15 +260,6 @@ __device__ __attribute__((const)) double __ocml_fma_rtp_f64(double, double, __device__ __attribute__((const)) double __ocml_fma_rtz_f64(double, double, double); -__device__ inline __attribute__((const)) double -__llvm_amdgcn_rcp_f64(double __x) { - return __builtin_amdgcn_rcp(__x); -} -__device__ inline __attribute__((const)) double -__llvm_amdgcn_rsq_f64(double __x) { - return __builtin_amdgcn_rsq(__x); -} - __device__ __attribute__((const)) _Float16 __ocml_ceil_f16(_Float16); __device__ _Float16 __ocml_cos_f16(_Float16); __device__ __attribute__((const)) _Float16 __ocml_cvtrtn_f16_f32(float); @@ -305,7 +279,6 @@ __device__ __attribute__((const)) int __ocml_isnan_f16(_Float16); __device__ __attribute__((pure)) _Float16 __ocml_log_f16(_Float16); __device__ __attribute__((pure)) _Float16 __ocml_log10_f16(_Float16); __device__ __attribute__((pure)) _Float16 __ocml_log2_f16(_Float16); -__device__ __attribute__((const)) _Float16 __llvm_amdgcn_rcp_f16(_Float16); __device__ __attribute__((const)) _Float16 __ocml_rint_f16(_Float16); __device__ __attribute__((const)) _Float16 __ocml_rsqrt_f16(_Float16); __device__ _Float16 __ocml_sin_f16(_Float16); @@ -332,11 +305,6 @@ __device__ __attribute__((const)) __2i16 __ocml_isnan_2f16(__2f16); __device__ __attribute__((pure)) __2f16 __ocml_log_2f16(__2f16); __device__ __attribute__((pure)) __2f16 __ocml_log10_2f16(__2f16); __device__ __attribute__((pure)) __2f16 __ocml_log2_2f16(__2f16); -__device__ inline __2f16 -__llvm_amdgcn_rcp_2f16(__2f16 __x) // Not currently exposed by ROCDL. -{ - return (__2f16)(__llvm_amdgcn_rcp_f16(__x.x), __llvm_amdgcn_rcp_f16(__x.y)); -} __device__ __attribute__((const)) __2f16 __ocml_rint_2f16(__2f16); __device__ __attribute__((const)) __2f16 __ocml_rsqrt_2f16(__2f16); __device__ __2f16 __ocml_sin_2f16(__2f16); diff --git a/clang/lib/Headers/__clang_hip_math.h b/clang/lib/Headers/__clang_hip_math.h index c19e32bd29364..2b33efcd8317a 100644 --- a/clang/lib/Headers/__clang_hip_math.h +++ b/clang/lib/Headers/__clang_hip_math.h @@ -268,7 +268,7 @@ __DEVICE__ int ilogbf(float __x) { return __ocml_ilogb_f32(__x); } __DEVICE__ -__RETURN_TYPE __finitef(float __x) { return __ocml_isfinite_f32(__x); } +__RETURN_TYPE __finitef(float __x) { return __builtin_isfinite(__x); } __DEVICE__ __RETURN_TYPE __isinff(float __x) { return __builtin_isinf(__x); } @@ -647,7 +647,7 @@ float __frcp_rn(float __x) { return 1.0f / __x; } #endif __DEVICE__ -float __frsqrt_rn(float __x) { return __llvm_amdgcn_rsq_f32(__x); } +float __frsqrt_rn(float __x) { return __builtin_amdgcn_rsqf(__x); } #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ @@ -817,7 +817,7 @@ __DEVICE__ int ilogb(double __x) { return __ocml_ilogb_f64(__x); } __DEVICE__ -__RETURN_TYPE __finite(double __x) { return __ocml_isfinite_f64(__x); } +__RETURN_TYPE __finite(double __x) { return __builtin_isfinite(__x); } __DEVICE__ __RETURN_TYPE __isinf(double __x) { return __builtin_isinf(__x); } diff --git a/clang/lib/Index/IndexDecl.cpp b/clang/lib/Index/IndexDecl.cpp index 882e02836d4fb..1c04aa17d53fb 100644 --- a/clang/lib/Index/IndexDecl.cpp +++ b/clang/lib/Index/IndexDecl.cpp @@ -705,6 +705,7 @@ class IndexingDeclVisitor : public ConstDeclVisitor { IndexCtx.handleReference(C->getNamedConcept(), C->getConceptNameLoc(), Parent, TTP->getLexicalDeclContext()); } else if (const auto *NTTP = dyn_cast(TP)) { + IndexCtx.indexTypeSourceInfo(NTTP->getTypeSourceInfo(), Parent); if (NTTP->hasDefaultArgument()) IndexCtx.indexBody(NTTP->getDefaultArgument(), Parent); } else if (const auto *TTPD = dyn_cast(TP)) { diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index d3b37c1fa70a2..72b2f1d1edd82 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2636,6 +2636,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, case Builtin::BI__builtin_elementwise_log2: case Builtin::BI__builtin_elementwise_log10: case Builtin::BI__builtin_elementwise_roundeven: + case Builtin::BI__builtin_elementwise_round: case Builtin::BI__builtin_elementwise_sin: case Builtin::BI__builtin_elementwise_trunc: case Builtin::BI__builtin_elementwise_canonicalize: { diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 40438214d2b46..d9146a89d322e 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -2039,7 +2039,7 @@ static void handleTLSModelAttr(Sema &S, Decl *D, const ParsedAttr &AL) { } if (S.Context.getTargetInfo().getTriple().isOSAIX() && - Model != "global-dynamic") { + Model != "global-dynamic" && Model != "local-exec") { S.Diag(LiteralLoc, diag::err_aix_attr_unsupported_tls_model) << Model; return; } diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index a0ccc5aa4a741..cba6791783e8b 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -6983,6 +6983,10 @@ QualType ASTReader::GetType(TypeID ID) { if (Index < NUM_PREDEF_TYPE_IDS) { QualType T; switch ((PredefinedTypeIDs)Index) { + case PREDEF_TYPE_LAST_ID: + // We should never use this one. + llvm_unreachable("Invalid predefined type"); + break; case PREDEF_TYPE_NULL_ID: return QualType(); case PREDEF_TYPE_VOID_ID: diff --git a/clang/test/CodeGen/PowerPC/aix-tls-model.cpp b/clang/test/CodeGen/PowerPC/aix-tls-model.cpp index a531f558ac796..2b53df54a5e3c 100644 --- a/clang/test/CodeGen/PowerPC/aix-tls-model.cpp +++ b/clang/test/CodeGen/PowerPC/aix-tls-model.cpp @@ -2,12 +2,12 @@ // RUN: %clang_cc1 %s -triple powerpc-unknown-aix -target-cpu pwr8 -ftls-model=global-dynamic -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-GD // RUN: not %clang_cc1 %s -triple powerpc-unknown-aix -target-cpu pwr8 -ftls-model=local-dynamic -emit-llvm 2>&1 | FileCheck %s -check-prefix=CHECK-LD-ERROR // RUN: not %clang_cc1 %s -triple powerpc-unknown-aix -target-cpu pwr8 -ftls-model=initial-exec -emit-llvm 2>&1 | FileCheck %s -check-prefix=CHECK-IE-ERROR -// RUN: not %clang_cc1 %s -triple powerpc-unknown-aix -target-cpu pwr8 -ftls-model=local-exec -emit-llvm 2>&1 | FileCheck %s -check-prefix=CHECK-LE-ERROR +// RUN: %clang_cc1 %s -triple powerpc-unknown-aix -target-cpu pwr8 -ftls-model=local-exec -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-LE // RUN: %clang_cc1 %s -triple powerpc64-unknown-aix -target-cpu pwr8 -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-GD // RUN: %clang_cc1 %s -triple powerpc64-unknown-aix -target-cpu pwr8 -ftls-model=global-dynamic -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-GD // RUN: not %clang_cc1 %s -triple powerpc64-unknown-aix -target-cpu pwr8 -ftls-model=local-dynamic -emit-llvm 2>&1 | FileCheck %s -check-prefix=CHECK-LD-ERROR // RUN: not %clang_cc1 %s -triple powerpc64-unknown-aix -target-cpu pwr8 -ftls-model=initial-exec -emit-llvm 2>&1 | FileCheck %s -check-prefix=CHECK-IE-ERROR -// RUN: not %clang_cc1 %s -triple powerpc64-unknown-aix -target-cpu pwr8 -ftls-model=local-exec -emit-llvm 2>&1 | FileCheck %s -check-prefix=CHECK-LE-ERROR +// RUN: %clang_cc1 %s -triple powerpc64-unknown-aix -target-cpu pwr8 -ftls-model=local-exec -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-LE int z1 = 0; int z2; @@ -23,4 +23,7 @@ int f() { // CHECK-GD: @_ZZ1fvE1y = internal thread_local global i32 0 // CHECK-LD-ERROR: error: TLS model 'local-dynamic' is not yet supported on AIX // CHECK-IE-ERROR: error: TLS model 'initial-exec' is not yet supported on AIX -// CHECK-LE-ERROR: error: TLS model 'local-exec' is not yet supported on AIX +// CHECK-LE: @z1 ={{.*}} global i32 0 +// CHECK-LE: @z2 ={{.*}} global i32 0 +// CHECK-LE: @x ={{.*}} thread_local(localexec) global i32 0 +// CHECK-LE: @_ZZ1fvE1y = internal thread_local(localexec) global i32 0 diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c index deb518b1ba597..4598faaa84163 100644 --- a/clang/test/CodeGen/builtins-elementwise-math.c +++ b/clang/test/CodeGen/builtins-elementwise-math.c @@ -468,6 +468,22 @@ void test_builtin_elementwise_roundeven(float f1, float f2, double d1, double d2 vf2 = __builtin_elementwise_roundeven(vf1); } +void test_builtin_elementwise_round(float f1, float f2, double d1, double d2, + float4 vf1, float4 vf2) { + // CHECK-LABEL: define void @test_builtin_elementwise_round( + // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 + // CHECK-NEXT: call float @llvm.round.f32(float [[F1]]) + f2 = __builtin_elementwise_round(f1); + + // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 + // CHECK-NEXT: call double @llvm.round.f64(double [[D1]]) + d2 = __builtin_elementwise_round(d1); + + // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 + // CHECK-NEXT: call <4 x float> @llvm.round.v4f32(<4 x float> [[VF1]]) + vf2 = __builtin_elementwise_round(vf1); +} + void test_builtin_elementwise_sin(float f1, float f2, double d1, double d2, float4 vf1, float4 vf2) { // CHECK-LABEL: define void @test_builtin_elementwise_sin( diff --git a/clang/test/CodeGen/builtins-nondeterministic-value.c b/clang/test/CodeGen/builtins-nondeterministic-value.c index aa040edf73d98..cc12f95997f04 100644 --- a/clang/test/CodeGen/builtins-nondeterministic-value.c +++ b/clang/test/CodeGen/builtins-nondeterministic-value.c @@ -5,8 +5,8 @@ typedef _Bool bool4 __attribute__((ext_vector_type(4))); int clang_nondet_i( int x ) { // CHECK-LABEL: entry -// CHECK: [[A:%.*]] = alloca i32, align 4 -// CHECK: store i32 [[X:%.*]], ptr [[A]], align 4 +// CHECK: [[A:%.*]] = alloca i32 +// CHECK: store i32 [[X:%.*]], ptr [[A]] // CHECK: [[R:%.*]] = freeze i32 poison // CHECK: ret i32 [[R]] return __builtin_nondeterministic_value(x); @@ -14,8 +14,8 @@ int clang_nondet_i( int x ) { float clang_nondet_f( float x ) { // CHECK-LABEL: entry -// CHECK: [[A:%.*]] = alloca float, align 4 -// CHECK: store float [[X:%.*]], ptr [[A]], align 4 +// CHECK: [[A:%.*]] = alloca float +// CHECK: store float [[X:%.*]], ptr [[A]] // CHECK: [[R:%.*]] = freeze float poison // CHECK: ret float [[R]] return __builtin_nondeterministic_value(x); @@ -23,8 +23,8 @@ float clang_nondet_f( float x ) { double clang_nondet_d( double x ) { // CHECK-LABEL: entry -// CHECK: [[A:%.*]] = alloca double, align 8 -// CHECK: store double [[X:%.*]], ptr [[A]], align 8 +// CHECK: [[A:%.*]] = alloca double +// CHECK: store double [[X:%.*]], ptr [[A]] // CHECK: [[R:%.*]] = freeze double poison // CHECK: ret double [[R]] return __builtin_nondeterministic_value(x); @@ -32,9 +32,9 @@ double clang_nondet_d( double x ) { _Bool clang_nondet_b( _Bool x) { // CHECK-LABEL: entry -// CHECK: [[A:%.*]] = alloca i8, align 1 +// CHECK: [[A:%.*]] = alloca i8 // CHECK: [[B:%.*]] = zext i1 %x to i8 -// CHECK: store i8 [[B]], ptr [[A]], align 1 +// CHECK: store i8 [[B]], ptr [[A]] // CHECK: [[R:%.*]] = freeze i1 poison // CHECK: ret i1 [[R]] return __builtin_nondeterministic_value(x); @@ -42,19 +42,19 @@ _Bool clang_nondet_b( _Bool x) { void clang_nondet_fv( ) { // CHECK-LABEL: entry -// CHECK: [[A:%.*]] = alloca <4 x float>, align +// CHECK: [[A:%.*]] = alloca <4 x float> // CHECK: [[R:%.*]] = freeze <4 x float> poison -// CHECK: store <4 x float> [[R]], ptr [[A]], align +// CHECK: store <4 x float> [[R]], ptr [[A]] // CHECK: ret void float4 x = __builtin_nondeterministic_value(x); } void clang_nondet_bv( ) { -// CHECK: [[A:%.*]] = alloca i8, align +// CHECK: [[A:%.*]] = alloca i8 // CHECK: [[V:%.*]] = freeze <4 x i1> poison // CHECK: [[SV:%.*]] = shufflevector <4 x i1> [[V]], <4 x i1> poison, <8 x i32> // CHECK: [[BC:%.*]] = bitcast <8 x i1> [[SV]] to i8 -// CHECK: store i8 [[BC]], ptr [[A]], align +// CHECK: store i8 [[BC]], ptr [[A]] // CHECK: ret void bool4 x = __builtin_nondeterministic_value(x); } diff --git a/clang/test/CodeGen/link-bitcode-file.c b/clang/test/CodeGen/link-bitcode-file.c index df04ec2bec0d9..58fee64a95138 100644 --- a/clang/test/CodeGen/link-bitcode-file.c +++ b/clang/test/CodeGen/link-bitcode-file.c @@ -11,6 +11,14 @@ // RUN: not %clang_cc1 -triple i386-pc-linux-gnu -mlink-bitcode-file no-such-file.bc \ // RUN: -emit-llvm -o - %s 2>&1 | FileCheck -check-prefix=CHECK-NO-FILE %s +// Make sure we can perform the same options if the input is LLVM-IR +// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm-bc -o %t-in.bc %s +// RUN: %clang_cc1 -triple i386-pc-linux-gnu -mlink-bitcode-file %t.bc \ +// RUN: -O3 -emit-llvm -o - %t-in.bc | FileCheck -check-prefix=CHECK-NO-BC %s +// RUN: %clang_cc1 -triple i386-pc-linux-gnu -O3 -emit-llvm -o - \ +// RUN: -mlink-bitcode-file %t.bc -mlink-bitcode-file %t-2.bc %t-in.bc \ +// RUN: | FileCheck -check-prefix=CHECK-NO-BC -check-prefix=CHECK-NO-BC2 %s + int f(void); #ifdef BITCODE diff --git a/clang/test/CodeGen/link-builtin-bitcode.c b/clang/test/CodeGen/link-builtin-bitcode.c new file mode 100644 index 0000000000000..b3b54badf3f82 --- /dev/null +++ b/clang/test/CodeGen/link-builtin-bitcode.c @@ -0,0 +1,42 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals --include-generated-funcs --version 2 +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx803 -DBITCODE -emit-llvm-bc -o %t-lib.bc %s +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm-bc -o %t.bc %s +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -emit-llvm \ +// RUN: -mlink-builtin-bitcode %t-lib.bc -o - %t.bc | FileCheck %s + +#ifdef BITCODE +int foo(void) { return 42; } +int x = 12; +#endif + +extern int foo(void); +extern int x; + +int bar() { return foo() + x; } +//. +// CHECK: @x = internal addrspace(1) global i32 12, align 4 +//. +// CHECK: Function Attrs: noinline nounwind optnone +// CHECK-LABEL: define dso_local i32 @bar +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[CALL:%.*]] = call i32 @foo() +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @x to ptr), align 4 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[TMP0]] +// CHECK-NEXT: ret i32 [[ADD]] +// +// +// CHECK: Function Attrs: convergent noinline nounwind optnone +// CHECK-LABEL: define internal i32 @foo +// CHECK-SAME: () #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: ret i32 42 +// +//. +// CHECK: attributes #0 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } +// CHECK: attributes #1 = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } +//. diff --git a/clang/test/CodeGen/xray-always-emit-customevent.cpp b/clang/test/CodeGen/xray-always-emit-customevent.cpp index 63d4b91f81908..289111c90ff0a 100644 --- a/clang/test/CodeGen/xray-always-emit-customevent.cpp +++ b/clang/test/CodeGen/xray-always-emit-customevent.cpp @@ -6,5 +6,5 @@ [[clang::xray_never_instrument]] void neverInstrument() { static constexpr char kPhase[] = "never"; __xray_customevent(kPhase, 5); - // CHECK: call void @llvm.xray.customevent(ptr{{.*}}, i32 5) + // CHECK: call void @llvm.xray.customevent(ptr{{.*}}, i64 5) } diff --git a/clang/test/CodeGen/xray-always-emit-typedevent.cpp b/clang/test/CodeGen/xray-always-emit-typedevent.cpp index 75af669abe28d..745c63f6b3fa5 100644 --- a/clang/test/CodeGen/xray-always-emit-typedevent.cpp +++ b/clang/test/CodeGen/xray-always-emit-typedevent.cpp @@ -6,5 +6,5 @@ [[clang::xray_never_instrument]] void neverInstrument() { static constexpr char kPhase[] = "never"; __xray_typedevent(1, kPhase, 5); - // CHECK: call void @llvm.xray.typedevent(i16 {{.*}}, ptr{{.*}}, i32 5) + // CHECK: call void @llvm.xray.typedevent(i64 {{.*}}, ptr{{.*}}, i64 5) } diff --git a/clang/test/CodeGen/xray-customevent.cpp b/clang/test/CodeGen/xray-customevent.cpp index efaa26d633109..c64cbbd7c4236 100644 --- a/clang/test/CodeGen/xray-customevent.cpp +++ b/clang/test/CodeGen/xray-customevent.cpp @@ -4,14 +4,14 @@ [[clang::xray_always_instrument]] void alwaysInstrument() { static constexpr char kPhase[] = "instrument"; __xray_customevent(kPhase, 10); - // CHECK: call void @llvm.xray.customevent(ptr{{.*}}, i32 10) + // CHECK: call void @llvm.xray.customevent(ptr{{.*}}, i64 10) } // CHECK-LABEL: @_Z15neverInstrumentv [[clang::xray_never_instrument]] void neverInstrument() { static constexpr char kPhase[] = "never"; __xray_customevent(kPhase, 5); - // CHECK-NOT: call void @llvm.xray.customevent(ptr{{.*}}, i32 5) + // CHECK-NOT: call void @llvm.xray.customevent( } // CHECK-LABEL: @_Z21conditionalInstrumenti @@ -23,6 +23,6 @@ else __xray_customevent(kUntrue, 6); - // CHECK: call void @llvm.xray.customevent(ptr{{.*}}, i32 4) - // CHECK: call void @llvm.xray.customevent(ptr{{.*}}, i32 6) + // CHECK: call void @llvm.xray.customevent(ptr{{.*}}, i64 4) + // CHECK: call void @llvm.xray.customevent(ptr{{.*}}, i64 6) } diff --git a/clang/test/CodeGen/xray-instrumentation-bundles.cpp b/clang/test/CodeGen/xray-instrumentation-bundles.cpp index 83ca42ac6d9e9..7b9ccb25a3b52 100644 --- a/clang/test/CodeGen/xray-instrumentation-bundles.cpp +++ b/clang/test/CodeGen/xray-instrumentation-bundles.cpp @@ -52,10 +52,10 @@ static constexpr char kPhase[] = "always"; __xray_customevent(kPhase, 6); __xray_typedevent(1, kPhase, 6); - // CUSTOM: call void @llvm.xray.customevent(ptr{{.*}}, i32 6) - // NOCUSTOM-NOT: call void @llvm.xray.customevent(ptr{{.*}}, i32 6) - // TYPED: call void @llvm.xray.typedevent(i16 {{.*}}, ptr{{.*}}, i32 6) - // NOTYPED-NOT: call void @llvm.xray.typedevent(i16 {{.*}}, ptr{{.*}}, i32 6) + // CUSTOM: call void @llvm.xray.customevent(ptr {{.*}}, i64 6) + // NOCUSTOM-NOT: call void @llvm.xray.customevent( + // TYPED: call void @llvm.xray.typedevent(i64 {{.*}}, ptr{{.*}}, i64 6) + // NOTYPED-NOT: call void @llvm.xray.typedevent( } // FUNCTION: attributes #[[ALWAYSATTR]] = {{.*}} "function-instrument"="xray-always" {{.*}} diff --git a/clang/test/CodeGen/xray-typedevent.cpp b/clang/test/CodeGen/xray-typedevent.cpp index 57597970cd9e8..2ea4f7c75a7f5 100644 --- a/clang/test/CodeGen/xray-typedevent.cpp +++ b/clang/test/CodeGen/xray-typedevent.cpp @@ -7,7 +7,7 @@ auto EventType = 1; static constexpr char kPhase[] = "instrument"; __xray_typedevent(EventType, kPhase, 10); - // CHECK: call void @llvm.xray.typedevent(i16 {{.*}}, ptr{{.*}}, i32 10) + // CHECK: call void @llvm.xray.typedevent(i64 {{.*}}, ptr{{.*}}, i64 10) } // CHECK-LABEL: @_Z15neverInstrumentv @@ -15,7 +15,7 @@ auto EventType = 2; static constexpr char kPhase[] = "never"; __xray_typedevent(EventType, kPhase, 5); - // CHECK-NOT: call void @llvm.xray.typedevent(i16 {{.*}}, ptr{{.*}}, i32 5) + // CHECK-NOT: call void @llvm.xray.typedevent(i64 {{.*}}, ptr{{.*}}, i64 5) } // CHECK-LABEL: @_Z21conditionalInstrumenti @@ -29,6 +29,6 @@ else __xray_typedevent(UntrueEventType, kUntrue, 6); - // CHECK: call void @llvm.xray.typedevent(i16 {{.*}}, ptr{{.*}}, i32 4) - // CHECK: call void @llvm.xray.typedevent(i16 {{.*}}, ptr{{.*}}, i32 6) + // CHECK: call void @llvm.xray.typedevent(i64 {{.*}}, ptr{{.*}}, i64 4) + // CHECK: call void @llvm.xray.typedevent(i64 {{.*}}, ptr{{.*}}, i64 6) } diff --git a/clang/test/CodeGenCXX/debug-info-deleted.cpp b/clang/test/CodeGenCXX/debug-info-deleted.cpp index d7d0b6dba49ea..564f7fff00fb9 100644 --- a/clang/test/CodeGenCXX/debug-info-deleted.cpp +++ b/clang/test/CodeGenCXX/debug-info-deleted.cpp @@ -11,6 +11,8 @@ // ATTR: DISubprogram(name: "operator=", linkageName: "_ZN7deletedaSERKS_", {{.*}}, flags: DIFlagPublic | DIFlagPrototyped, spFlags: DISPFlagDeleted // ATTR: DISubprogram(name: "deleted", {{.*}}, flags: DIFlagPublic | DIFlagPrototyped, spFlags: DISPFlagDeleted // ATTR: DISubprogram(name: "operator=", linkageName: "_ZN7deletedaSEOS_", {{.*}}, flags: DIFlagPublic | DIFlagPrototyped, spFlags: DISPFlagDeleted +// ATTR: DISubprogram(name: "func", {{.*}}, flags: DIFlagPublic | DIFlagPrototyped | DIFlagRValueReference, spFlags: DISPFlagDeleted +// ATTR: DISubprogram(name: "bar", {{.*}}, flags: DIFlagPublic | DIFlagPrototyped | DIFlagStaticMember, spFlags: DISPFlagDeleted // ATTR: DISubprogram(name: "~deleted", {{.*}}, flags: DIFlagPublic | DIFlagPrototyped, class deleted { public: @@ -23,6 +25,9 @@ class deleted { deleted(deleted &&) = delete; deleted &operator=(deleted &&) = delete; + void func() && = delete; + static int bar() = delete; + ~deleted() = default; }; diff --git a/clang/test/Driver/XRay/xray-nolinkdeps.cpp b/clang/test/Driver/XRay/xray-nolinkdeps.cpp index 5461fc325a242..0c42c1af4f926 100644 --- a/clang/test/Driver/XRay/xray-nolinkdeps.cpp +++ b/clang/test/Driver/XRay/xray-nolinkdeps.cpp @@ -1,8 +1,6 @@ -// RUN: %clang -v -o /dev/null -fxray-instrument -fnoxray-link-deps %s -### \ +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fxray-instrument -fxray-link-deps -fno-xray-link-deps %s \ // RUN: 2>&1 | FileCheck --check-prefix DISABLE %s -// RUN: %clang -v -o /dev/null -fxray-instrument -fxray-link-deps %s -### \ +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fxray-instrument %s \ // RUN: 2>&1 | FileCheck --check-prefix ENABLE %s -// ENABLE: clang_rt.xray +// ENABLE: "--whole-archive" "{{.*}}clang_rt.xray{{.*}}"--no-whole-archive" // DISABLE-NOT: clang_rt.xray -// REQUIRES: linux || freebsd -// REQUIRES: amd64 || x86_64 || x86_64h || arm || aarch64 || arm64 diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c index 97058a96465f8..140cc72886692 100644 --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -130,3 +130,12 @@ // RUN: -o a.out 2>&1 | FileCheck %s --check-prefix=MISSING-LIBRARY // MISSING-LIBRARY: error: unable to find library -ldummy + +// RUN: clang-offload-packager -o %t.out \ +// RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \ +// RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out +// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --clang-backend \ +// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CLANG-BACKEND + +// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -O2 -Wl,--no-undefined {{.*}}.bc diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip index 7169614112d18..bb96aeaa1ab9b 100644 --- a/clang/test/Headers/__clang_hip_math.hip +++ b/clang/test/Headers/__clang_hip_math.hip @@ -1155,17 +1155,14 @@ extern "C" __device__ int test_ilogb(double x) { // DEFAULT-LABEL: @test___finitef( // DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call i32 @__ocml_isfinite_f32(float noundef [[X:%.*]]) #[[ATTR14]] -// DEFAULT-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[CALL_I]], 0 -// DEFAULT-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL_I]] to i32 +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.fabs.f32(float [[X:%.*]]) #[[ATTR17:[0-9]+]] +// DEFAULT-NEXT: [[CMPINF_I:%.*]] = fcmp contract one float [[TMP0]], 0x7FF0000000000000 +// DEFAULT-NEXT: [[CONV:%.*]] = zext i1 [[CMPINF_I]] to i32 // DEFAULT-NEXT: ret i32 [[CONV]] // // FINITEONLY-LABEL: @test___finitef( // FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call i32 @__ocml_isfinite_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]] -// FINITEONLY-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[CALL_I]], 0 -// FINITEONLY-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL_I]] to i32 -// FINITEONLY-NEXT: ret i32 [[CONV]] +// FINITEONLY-NEXT: ret i32 1 // extern "C" __device__ BOOL_TYPE test___finitef(float x) { return __finitef(x); @@ -1173,17 +1170,14 @@ extern "C" __device__ BOOL_TYPE test___finitef(float x) { // DEFAULT-LABEL: @test___finite( // DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[CALL_I:%.*]] = tail call i32 @__ocml_isfinite_f64(double noundef [[X:%.*]]) #[[ATTR14]] -// DEFAULT-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[CALL_I]], 0 -// DEFAULT-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL_I]] to i32 +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.fabs.f64(double [[X:%.*]]) #[[ATTR17]] +// DEFAULT-NEXT: [[CMPINF_I:%.*]] = fcmp contract one double [[TMP0]], 0x7FF0000000000000 +// DEFAULT-NEXT: [[CONV:%.*]] = zext i1 [[CMPINF_I]] to i32 // DEFAULT-NEXT: ret i32 [[CONV]] // // FINITEONLY-LABEL: @test___finite( // FINITEONLY-NEXT: entry: -// FINITEONLY-NEXT: [[CALL_I:%.*]] = tail call i32 @__ocml_isfinite_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]] -// FINITEONLY-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[CALL_I]], 0 -// FINITEONLY-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL_I]] to i32 -// FINITEONLY-NEXT: ret i32 [[CONV]] +// FINITEONLY-NEXT: ret i32 1 // extern "C" __device__ BOOL_TYPE test___finite(double x) { return __finite(x); @@ -1191,7 +1185,7 @@ extern "C" __device__ BOOL_TYPE test___finite(double x) { // DEFAULT-LABEL: @test___isinff( // DEFAULT-NEXT: entry: -// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.fabs.f32(float [[X:%.*]]) #[[ATTR17:[0-9]+]] +// DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.fabs.f32(float [[X:%.*]]) #[[ATTR17]] // DEFAULT-NEXT: [[CMPINF_I:%.*]] = fcmp contract oeq float [[TMP0]], 0x7FF0000000000000 // DEFAULT-NEXT: [[CONV:%.*]] = zext i1 [[CMPINF_I]] to i32 // DEFAULT-NEXT: ret i32 [[CONV]] diff --git a/clang/test/Modules/embed-files-compressed.cpp b/clang/test/Modules/embed-files-compressed.cpp index ae016bc1f9630..873b3082a2fdf 100644 --- a/clang/test/Modules/embed-files-compressed.cpp +++ b/clang/test/Modules/embed-files-compressed.cpp @@ -17,7 +17,7 @@ // RUN: %clang_cc1 -fmodules -I%t -fmodules-cache-path=%t -fmodule-name=a -emit-module %t/modulemap -fmodules-embed-all-files -o %t/a.pcm // // The above embeds ~4.5MB of highly-predictable /s and \ns into the pcm file. -// Check that the resulting file is under 40KB: +// Check that the resulting file is under 60KB: // // RUN: wc -c %t/a.pcm | FileCheck --check-prefix=CHECK-SIZE %s -// CHECK-SIZE: {{(^|[^0-9])[123][0-9][0-9][0-9][0-9]($|[^0-9])}} +// CHECK-SIZE: {{(^|[^0-9])[1-5][0-9][0-9][0-9][0-9]($|[^0-9])}} diff --git a/clang/test/Modules/empty.modulemap b/clang/test/Modules/empty.modulemap index 3225d88829ae0..f2d37c19d77bc 100644 --- a/clang/test/Modules/empty.modulemap +++ b/clang/test/Modules/empty.modulemap @@ -13,8 +13,8 @@ // The module file should be identical each time we produce it. // RUN: diff %t/base.pcm %t/check.pcm // -// We expect an empty module to be less than 40KB (and at least 10K, for now). +// We expect an empty module to be less than 60KB (and at least 10K, for now). // RUN: wc -c %t/base.pcm | FileCheck --check-prefix=CHECK-SIZE %s -// CHECK-SIZE: {{(^|[^0-9])[123][0-9][0-9][0-9][0-9]($|[^0-9])}} +// CHECK-SIZE: {{(^|[^0-9])[1-5][0-9][0-9][0-9][0-9]($|[^0-9])}} module empty { header "Inputs/empty.h" export * } diff --git a/clang/test/Sema/aix-attr-tls_model.c b/clang/test/Sema/aix-attr-tls_model.c index 8cf3086fc1aa0..245a4434d35c6 100644 --- a/clang/test/Sema/aix-attr-tls_model.c +++ b/clang/test/Sema/aix-attr-tls_model.c @@ -8,4 +8,4 @@ static __thread int y __attribute((tls_model("global-dynamic"))); // no-warning static __thread int y __attribute((tls_model("local-dynamic"))); // expected-error {{TLS model 'local-dynamic' is not yet supported on AIX}} static __thread int y __attribute((tls_model("initial-exec"))); // expected-error {{TLS model 'initial-exec' is not yet supported on AIX}} -static __thread int y __attribute((tls_model("local-exec"))); // expected-error {{TLS model 'local-exec' is not yet supported on AIX}} +static __thread int y __attribute((tls_model("local-exec"))); // no-warning diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c index 35b065437e54f..030c03eccd646 100644 --- a/clang/test/Sema/builtins-elementwise-math.c +++ b/clang/test/Sema/builtins-elementwise-math.c @@ -459,6 +459,32 @@ void test_builtin_elementwise_roundeven(int i, float f, double d, float4 v, int3 // expected-error@-1 {{1st argument must be a floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}} } +void test_builtin_elementwise_round(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) { + + struct Foo s = __builtin_elementwise_round(f); + // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'float'}} + + i = __builtin_elementwise_round(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} + + i = __builtin_elementwise_round(i); + // expected-error@-1 {{1st argument must be a floating point type (was 'int')}} + + i = __builtin_elementwise_round(f, f); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} + + u = __builtin_elementwise_round(u); + // expected-error@-1 {{1st argument must be a floating point type (was 'unsigned int')}} + + uv = __builtin_elementwise_round(uv); + // expected-error@-1 {{1st argument must be a floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}} + + // FIXME: Error should not mention integer + _Complex float c1, c2; + c1 = __builtin_elementwise_round(c1); + // expected-error@-1 {{1st argument must be a vector, integer or floating point type (was '_Complex float')}} +} + void test_builtin_elementwise_sin(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) { struct Foo s = __builtin_elementwise_sin(f); diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 0af0f2e371b18..c553cf86da8e3 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -427,6 +427,17 @@ Expected clang(ArrayRef InputFiles, const ArgList &Args) { for (StringRef Arg : Args.getAllArgValues(OPT_linker_arg_EQ)) CmdArgs.push_back(Args.MakeArgString("-Wl," + Arg)); + for (StringRef Arg : Args.getAllArgValues(OPT_builtin_bitcode_EQ)) { + if (llvm::Triple(Arg.split('=').first) == Triple) + CmdArgs.append({"-Xclang", "-mlink-builtin-bitcode", "-Xclang", + Args.MakeArgString(Arg.split('=').second)}); + } + + // The OpenMPOpt pass can introduce new calls and is expensive, we do not want + // this when running CodeGen through clang. + if (Args.hasArg(OPT_clang_backend) || Args.hasArg(OPT_builtin_bitcode_EQ)) + CmdArgs.append({"-mllvm", "-openmp-opt-disable"}); + if (Error Err = executeCommands(*ClangPath, CmdArgs)) return std::move(Err); @@ -629,7 +640,7 @@ Error linkBitcodeFiles(SmallVectorImpl &InputFiles, llvm::erase_if(InputFiles, [](OffloadFile &F) { return !F.getBinary(); }); // LTO Module hook to output bitcode without running the backend. - SmallVector BitcodeOutput; + SmallVector BitcodeOutput; auto OutputBitcode = [&](size_t, const Module &M) { auto TempFileOrErr = createOutputFile(sys::path::filename(ExecutableName) + "-jit-" + Triple.getTriple(), @@ -648,7 +659,9 @@ Error linkBitcodeFiles(SmallVectorImpl &InputFiles, // We assume visibility of the whole program if every input file was bitcode. auto Features = getTargetFeatures(BitcodeInputFiles); - auto LTOBackend = Args.hasArg(OPT_embed_bitcode) + auto LTOBackend = Args.hasArg(OPT_embed_bitcode) || + Args.hasArg(OPT_builtin_bitcode_EQ) || + Args.hasArg(OPT_clang_backend) ? createLTO(Args, Features, OutputBitcode) : createLTO(Args, Features); @@ -757,8 +770,12 @@ Error linkBitcodeFiles(SmallVectorImpl &InputFiles, return Error::success(); } - // Append the new inputs to the device linker input. - for (StringRef File : Files) + // Append the new inputs to the device linker input. If the user requested an + // internalizing link we need to pass the bitcode to clang. + for (StringRef File : + Args.hasArg(OPT_clang_backend) || Args.hasArg(OPT_builtin_bitcode_EQ) + ? BitcodeOutput + : Files) OutputFiles.push_back(File); return Error::success(); diff --git a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td index 5dace9766e0c1..abab4d0b39b90 100644 --- a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td +++ b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td @@ -25,9 +25,16 @@ def opt_level : Joined<["--"], "opt-level=">, def bitcode_library_EQ : Joined<["--"], "bitcode-library=">, Flags<[WrapperOnlyOption]>, MetaVarName<"--=">, HelpText<"Extra bitcode library to link">; +def builtin_bitcode_EQ : Joined<["--"], "builtin-bitcode=">, + Flags<[WrapperOnlyOption]>, MetaVarName<"=">, + HelpText<"Perform a special internalizing link on the bitcode file. " + "This is necessary for some vendor libraries to be linked correctly">; def device_linker_args_EQ : Joined<["--"], "device-linker=">, Flags<[WrapperOnlyOption]>, MetaVarName<" or =">, HelpText<"Arguments to pass to the device linker invocation">; +def clang_backend : Flag<["--"], "clang-backend">, + Flags<[WrapperOnlyOption]>, + HelpText<"Run the backend using clang rather than the LTO backend">; def dry_run : Flag<["--"], "dry-run">, Flags<[WrapperOnlyOption]>, HelpText<"Print program arguments without running">; diff --git a/clang/tools/libclang/CXFile.h b/clang/tools/libclang/CXFile.h index 8cb28a7ad0051..888593b7bc751 100644 --- a/clang/tools/libclang/CXFile.h +++ b/clang/tools/libclang/CXFile.h @@ -15,7 +15,8 @@ namespace clang { namespace cxfile { inline CXFile makeCXFile(OptionalFileEntryRef FE) { - return CXFile(FE ? &FE->getMapEntry() : nullptr); + return CXFile(FE ? const_cast(&FE->getMapEntry()) + : nullptr); } inline OptionalFileEntryRef getFileEntryRef(CXFile File) { diff --git a/clang/unittests/Index/IndexTests.cpp b/clang/unittests/Index/IndexTests.cpp index 88ad63b97b92a..690673a1072b0 100644 --- a/clang/unittests/Index/IndexTests.cpp +++ b/clang/unittests/Index/IndexTests.cpp @@ -392,6 +392,20 @@ TEST(IndexTest, EnumBase) { Contains(AllOf(QName("MyTypedef"), HasRole(SymbolRole::Reference), WrittenAt(Position(4, 16)))))); } + +TEST(IndexTest, NonTypeTemplateParameter) { + std::string Code = R"cpp( + enum class Foobar { foo }; + template + constexpr void func() {} + )cpp"; + auto Index = std::make_shared(); + tooling::runToolOnCode(std::make_unique(Index), Code); + EXPECT_THAT(Index->Symbols, + Contains(AllOf(QName("Foobar"), HasRole(SymbolRole::Reference), + WrittenAt(Position(3, 15))))); +} + } // namespace } // namespace index } // namespace clang diff --git a/compiler-rt/include/xray/xray_interface.h b/compiler-rt/include/xray/xray_interface.h index 410515d2373af..727431c04e4f7 100644 --- a/compiler-rt/include/xray/xray_interface.h +++ b/compiler-rt/include/xray/xray_interface.h @@ -76,8 +76,9 @@ extern int __xray_remove_customevent_handler(); /// Set a handler for xray typed event logging. The first parameter is a type /// identifier, the second is a payload, and the third is the payload size. -extern int __xray_set_typedevent_handler(void (*entry)(uint16_t, const void *, - std::size_t)); +/// NOTE: fdrLoggingHandleTypedEvent only supports uint16_t event type. +extern int __xray_set_typedevent_handler(void (*entry)(size_t, const void *, + size_t)); /// Removes the currently set typed event handler. /// Returns 1 on success, 0 on error. diff --git a/compiler-rt/lib/xray/xray_fdr_logging.cpp b/compiler-rt/lib/xray/xray_fdr_logging.cpp index 799814f437f92..6056f8134553b 100644 --- a/compiler-rt/lib/xray/xray_fdr_logging.cpp +++ b/compiler-rt/lib/xray/xray_fdr_logging.cpp @@ -580,9 +580,9 @@ void fdrLoggingHandleCustomEvent(void *Event, TLD.Controller->customEvent(TSC, CPU, Event, ReducedEventSize); } -void fdrLoggingHandleTypedEvent( - uint16_t EventType, const void *Event, - std::size_t EventSize) noexcept XRAY_NEVER_INSTRUMENT { +void fdrLoggingHandleTypedEvent(size_t EventType, const void *Event, + size_t EventSize) noexcept + XRAY_NEVER_INSTRUMENT { auto TC = getTimestamp(); auto &TSC = TC.TSC; auto &CPU = TC.CPU; @@ -607,7 +607,8 @@ void fdrLoggingHandleTypedEvent( return; int32_t ReducedEventSize = static_cast(EventSize); - TLD.Controller->typedEvent(TSC, CPU, EventType, Event, ReducedEventSize); + TLD.Controller->typedEvent(TSC, CPU, static_cast(EventType), Event, + ReducedEventSize); } XRayLogInitStatus fdrLoggingInit(size_t, size_t, void *Options, diff --git a/compiler-rt/lib/xray/xray_interface.cpp b/compiler-rt/lib/xray/xray_interface.cpp index 73e67618c9d5f..af9f11e166ac5 100644 --- a/compiler-rt/lib/xray/xray_interface.cpp +++ b/compiler-rt/lib/xray/xray_interface.cpp @@ -426,8 +426,8 @@ int __xray_set_customevent_handler(void (*entry)(void *, size_t)) return 0; } -int __xray_set_typedevent_handler(void (*entry)( - uint16_t, const void *, size_t)) XRAY_NEVER_INSTRUMENT { +int __xray_set_typedevent_handler(void (*entry)(size_t, const void *, + size_t)) XRAY_NEVER_INSTRUMENT { if (atomic_load(&XRayInitialized, memory_order_acquire)) { atomic_store(&__xray::XRayPatchedTypedEvent, diff --git a/compiler-rt/lib/xray/xray_trampoline_AArch64.S b/compiler-rt/lib/xray/xray_trampoline_AArch64.S index 3bf52cef60fed..eea56d7f0a871 100644 --- a/compiler-rt/lib/xray/xray_trampoline_AArch64.S +++ b/compiler-rt/lib/xray/xray_trampoline_AArch64.S @@ -1,163 +1,106 @@ #include "../builtins/assembly.h" +#include "../sanitizer_common/sanitizer_asm.h" - .text - /* The variable containing the handler function pointer */ - .global _ZN6__xray19XRayPatchedFunctionE - /* Word-aligned function entry point */ - .p2align 2 - /* Let C/C++ see the symbol */ - .global __xray_FunctionEntry - .hidden __xray_FunctionEntry - .type __xray_FunctionEntry, %function - /* In C++ it is void extern "C" __xray_FunctionEntry(uint32_t FuncId) with - FuncId passed in W0 register. */ -__xray_FunctionEntry: +.macro SAVE_REGISTERS + stp x1, x2, [sp, #-16]! + stp x3, x4, [sp, #-16]! + stp x5, x6, [sp, #-16]! + stp x7, x30, [sp, #-16]! + stp q0, q1, [sp, #-32]! + stp q2, q3, [sp, #-32]! + stp q4, q5, [sp, #-32]! + stp q6, q7, [sp, #-32]! + // x8 is the indirect result register and needs to be preserved for the body of the function to use. + stp x8, x0, [sp, #-16]! +.endm + +.macro RESTORE_REGISTERS + ldp x8, x0, [sp], #16 + ldp q6, q7, [sp], #32 + ldp q4, q5, [sp], #32 + ldp q2, q3, [sp], #32 + ldp q0, q1, [sp], #32 + ldp x7, x30, [sp], #16 + ldp x5, x6, [sp], #16 + ldp x3, x4, [sp], #16 + ldp x1, x2, [sp], #16 +.endm + +.text +.p2align 2 +.global ASM_SYMBOL(__xray_FunctionEntry) +ASM_HIDDEN(__xray_FunctionEntry) +ASM_TYPE_FUNCTION(__xray_FunctionEntry) +ASM_SYMBOL(__xray_FunctionEntry): /* Move the return address beyond the end of sled data. The 12 bytes of data are inserted in the code of the runtime patch, between the call instruction and the instruction returned into. The data contains 32 bits of instrumented function ID and 64 bits of the address of the current trampoline. */ - ADD X30, X30, #12 - /* Push the registers which may be modified by the handler function */ - STP X1, X2, [SP, #-16]! - STP X3, X4, [SP, #-16]! - STP X5, X6, [SP, #-16]! - STP X7, X30, [SP, #-16]! - STP Q0, Q1, [SP, #-32]! - STP Q2, Q3, [SP, #-32]! - STP Q4, Q5, [SP, #-32]! - STP Q6, Q7, [SP, #-32]! - /* X8 is the indirect result register and needs to be preserved for the body - of the function to use */ - STP X8, X0, [SP, #-16]! + add x30, x30, #12 + // Push the registers which may be modified by the handler function. + SAVE_REGISTERS - /* Load the page address of _ZN6__xray19XRayPatchedFunctionE into X1 */ - ADRP X1, _ZN6__xray19XRayPatchedFunctionE - /* Load the handler function pointer into X2 */ - LDR X2, [X1, #:lo12:_ZN6__xray19XRayPatchedFunctionE] - /* Handler address is nullptr if handler is not set */ - CMP X2, #0 - BEQ FunctionEntry_restore - /* Function ID is already in W0 (the first parameter). - X1=0 means that we are tracing an entry event */ - MOV X1, #0 - /* Call the handler with 2 parameters in W0 and X1 */ - BLR X2 -FunctionEntry_restore: - /* Pop the saved registers */ - LDP X8, X0, [SP], #16 - LDP Q6, Q7, [SP], #32 - LDP Q4, Q5, [SP], #32 - LDP Q2, Q3, [SP], #32 - LDP Q0, Q1, [SP], #32 - LDP X7, X30, [SP], #16 - LDP X5, X6, [SP], #16 - LDP X3, X4, [SP], #16 - LDP X1, X2, [SP], #16 - RET + // Load the handler function pointer. + adrp x2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE) + ldr x2, [x2, #:lo12:ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)] + cbz x2, 1f + // Function ID is already in w0 (the first parameter). x1=0 means that we are tracing an entry event. + mov x1, #0 + // Call the handler with 2 parameters. + blr x2 +1: + RESTORE_REGISTERS + ret - /* Word-aligned function entry point */ - .p2align 2 - /* Let C/C++ see the symbol */ - .global __xray_FunctionExit - .hidden __xray_FunctionExit - .type __xray_FunctionExit, %function - /* In C++ it is void extern "C" __xray_FunctionExit(uint32_t FuncId) with - FuncId passed in W0 register. */ -__xray_FunctionExit: +.p2align 2 +.global ASM_SYMBOL(__xray_FunctionExit) +ASM_HIDDEN(__xray_FunctionExit) +ASM_TYPE_FUNCTION(__xray_FunctionExit) +ASM_SYMBOL(__xray_FunctionExit): /* Move the return address beyond the end of sled data. The 12 bytes of data are inserted in the code of the runtime patch, between the call instruction and the instruction returned into. The data contains 32 bits of instrumented function ID and 64 bits of the address of the current trampoline. */ - ADD X30, X30, #12 - /* Push the registers which may be modified by the handler function */ - STP X1, X2, [SP, #-16]! - STP X3, X4, [SP, #-16]! - STP X5, X6, [SP, #-16]! - STP X7, X30, [SP, #-16]! - STP Q0, Q1, [SP, #-32]! - STP Q2, Q3, [SP, #-32]! - STP Q4, Q5, [SP, #-32]! - STP Q6, Q7, [SP, #-32]! - /* X8 is the indirect result register and needs to be preserved for the body - of the function to use */ - STP X8, X0, [SP, #-16]! + add x30, x30, #12 + SAVE_REGISTERS - /* Load the page address of _ZN6__xray19XRayPatchedFunctionE into X1 */ - ADRP X1, _ZN6__xray19XRayPatchedFunctionE - /* Load the handler function pointer into X2 */ - LDR X2, [X1, #:lo12:_ZN6__xray19XRayPatchedFunctionE] - /* Handler address is nullptr if handler is not set */ - CMP X2, #0 - BEQ FunctionExit_restore - /* Function ID is already in W0 (the first parameter). - X1=1 means that we are tracing an exit event */ - MOV X1, #1 - /* Call the handler with 2 parameters in W0 and X1 */ - BLR X2 -FunctionExit_restore: - LDP X8, X0, [SP], #16 - LDP Q6, Q7, [SP], #32 - LDP Q4, Q5, [SP], #32 - LDP Q2, Q3, [SP], #32 - LDP Q0, Q1, [SP], #32 - LDP X7, X30, [SP], #16 - LDP X5, X6, [SP], #16 - LDP X3, X4, [SP], #16 - LDP X1, X2, [SP], #16 - RET + // Load the handler function pointer into x2. + adrp x2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE) + ldr x2, [x2, #:lo12:ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)] + cbz x2, 1f + // Function ID is already in w0 (the first parameter). x1=1 means that we are tracing an exit event. + mov x1, #1 + // Call the handler with 2 parameters. + blr x2 +1: + RESTORE_REGISTERS + ret - /* Word-aligned function entry point */ - .p2align 2 - /* Let C/C++ see the symbol */ - .global __xray_FunctionTailExit - .hidden __xray_FunctionTailExit - .type __xray_FunctionTailExit, %function - /* In C++ it is void extern "C" __xray_FunctionTailExit(uint32_t FuncId) - with FuncId passed in W0 register. */ -__xray_FunctionTailExit: +.p2align 2 +.global ASM_SYMBOL(__xray_FunctionTailExit) +ASM_HIDDEN(__xray_FunctionTailExit) +ASM_TYPE_FUNCTION(__xray_FunctionTailExit) +ASM_SYMBOL(__xray_FunctionTailExit): /* Move the return address beyond the end of sled data. The 12 bytes of data are inserted in the code of the runtime patch, between the call instruction and the instruction returned into. The data contains 32 bits of instrumented function ID and 64 bits of the address of the current trampoline. */ - ADD X30, X30, #12 - /* Push the registers which may be modified by the handler function */ - STP X1, X2, [SP, #-16]! - STP X3, X4, [SP, #-16]! - STP X5, X6, [SP, #-16]! - STP X7, X30, [SP, #-16]! - /* Push the parameters of the tail called function */ - STP Q0, Q1, [SP, #-32]! - STP Q2, Q3, [SP, #-32]! - STP Q4, Q5, [SP, #-32]! - STP Q6, Q7, [SP, #-32]! - /* Load the page address of _ZN6__xray19XRayPatchedFunctionE into X1 */ - ADRP X1, _ZN6__xray19XRayPatchedFunctionE - /* Load the handler function pointer into X2 */ - LDR X2, [X1, #:lo12:_ZN6__xray19XRayPatchedFunctionE] - /* Handler address is nullptr if handler is not set */ - CMP X2, #0 - BEQ FunctionTailExit_restore - /* Function ID is already in W0 (the first parameter). - X1=2 means that we are tracing a tail exit event, but before the - logging part of XRay is ready, we pretend that here a normal function - exit happens, so we give the handler code 1 */ - MOV X1, #1 - /* Call the handler with 2 parameters in W0 and X1 */ - BLR X2 -FunctionTailExit_restore: - /* Pop the parameters of the tail called function */ - LDP Q6, Q7, [SP], #32 - LDP Q4, Q5, [SP], #32 - LDP Q2, Q3, [SP], #32 - LDP Q0, Q1, [SP], #32 - /* Pop the registers which may be modified by the handler function */ - LDP X7, X30, [SP], #16 - LDP X5, X6, [SP], #16 - LDP X3, X4, [SP], #16 - LDP X1, X2, [SP], #16 - RET + add x30, x30, #12 + // Save the registers which may be modified by the handler function. + SAVE_REGISTERS + // Load the handler function pointer into x2. + adrp x2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE) + ldr x2, [x2, #:lo12:ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)] + cbz x2, 1f + // Function ID is already in w0 (the first parameter). x1=2 means that we are tracing a tail exit event. + mov x1, #2 + // Call the handler with 2 parameters. + blr x2 +1: + RESTORE_REGISTERS + ret NO_EXEC_STACK_DIRECTIVE diff --git a/compiler-rt/lib/xray/xray_x86_64.cpp b/compiler-rt/lib/xray/xray_x86_64.cpp index 1bf241c1223fc..b9666a40861d4 100644 --- a/compiler-rt/lib/xray/xray_x86_64.cpp +++ b/compiler-rt/lib/xray/xray_x86_64.cpp @@ -250,10 +250,8 @@ bool patchCustomEvent(const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { // Here we do the dance of replacing the following sled: // - // In Version 0: - // // xray_sled_n: - // jmp +20 // 2 bytes + // jmp +15 // 2 bytes // ... // // With the following: @@ -262,36 +260,17 @@ bool patchCustomEvent(const bool Enable, const uint32_t FuncId, // ... // // - // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'. - // - // --- - // - // In Version 1 or 2: - // - // The jump offset is now 15 bytes (0x0f), so when restoring the nopw back - // to a jmp, use 15 bytes instead. - // + // The "unpatch" should just turn the 'nopw' back to a 'jmp +15'. const uint64_t Address = Sled.address(); if (Enable) { std::atomic_store_explicit( reinterpret_cast *>(Address), NopwSeq, std::memory_order_release); } else { - switch (Sled.Version) { - case 1: - case 2: - std::atomic_store_explicit( - reinterpret_cast *>(Address), Jmp15Seq, - std::memory_order_release); - break; - case 0: - default: - std::atomic_store_explicit( - reinterpret_cast *>(Address), Jmp20Seq, - std::memory_order_release); - break; - } - } + std::atomic_store_explicit( + reinterpret_cast *>(Address), Jmp15Seq, + std::memory_order_release); + } return false; } diff --git a/compiler-rt/test/xray/TestCases/Posix/typed-event-logging.cpp b/compiler-rt/test/xray/TestCases/Posix/typed-event-logging.cpp new file mode 100644 index 0000000000000..103b11cfb0971 --- /dev/null +++ b/compiler-rt/test/xray/TestCases/Posix/typed-event-logging.cpp @@ -0,0 +1,36 @@ +// RUN: %clangxx_xray %s -o %t +// RUN: XRAY_OPTIONS=patch_premain=false:verbosity=1 %run %t 2>&1 | FileCheck %s + +// REQUIRES: target={{x86_64-.*linux.*}} + +#include +#include +#include "xray/xray_interface.h" + +[[clang::xray_always_instrument]] void foo() { + static constexpr char CustomLogged[] = "hello custom logging!"; + printf("before calling the custom logging...\n"); + __xray_typedevent(42, CustomLogged, sizeof(CustomLogged)); + printf("after calling the custom logging...\n"); +} + +static void myprinter(size_t type, const void *ptr, size_t size) { + assert(type == 42); + printf("%.*s\n", static_cast(size), static_cast(ptr)); +} + +int main() { + // CHECK: before calling the custom logging... + // CHECK-NEXT: after calling the custom logging... + foo(); + __xray_set_typedevent_handler(myprinter); + __xray_patch(); + // CHECK-NEXT: before calling the custom logging... + // CHECK-NEXT: hello custom logging! + // CHECK-NEXT: after calling the custom logging... + foo(); + // CHECK-NEXT: before calling the custom logging... + // CHECK-NEXT: after calling the custom logging... + __xray_remove_typedevent_handler(); + foo(); +} diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index c9d68f1e1cf90..8b05c97360607 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -2606,6 +2606,7 @@ def fir_ConvertOp : fir_OneResultOp<"convert", [NoMemoryEffect]> { let hasVerifier = 1; let extraClassDeclaration = [{ + static bool isInteger(mlir::Type ty); static bool isIntegerCompatible(mlir::Type ty); static bool isFloatCompatible(mlir::Type ty); static bool isPointerCompatible(mlir::Type ty); diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td index 2dd85a2c5c181..beb50a48d86df 100644 --- a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td +++ b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td @@ -359,6 +359,27 @@ def hlfir_AnyOp : hlfir_Op<"any", []> { let hasVerifier = 1; } +def hlfir_CountOp : hlfir_Op<"count", [AttrSizedOperandSegments]> { + let summary = "COUNT transformational intrinsic"; + let description = [{ + Takes a logical and counts the number of true values. + }]; + + let arguments = (ins + AnyFortranLogicalArrayObject:$mask, + Optional:$dim, + Optional:$kind + ); + + let results = (outs AnyFortranValue); + + let assemblyFormat = [{ + $mask (`dim` $dim^)? (`kind` $kind^)? attr-dict `:` functional-type(operands, results) + }]; + + let hasVerifier = 1; +} + def hlfir_ProductOp : hlfir_Op<"product", [AttrSizedOperandSegments, DeclareOpInterfaceMethods]> { @@ -430,7 +451,7 @@ def hlfir_SumOp : hlfir_Op<"sum", [AttrSizedOperandSegments, let hasVerifier = 1; } -def hlifr_DotProductOp : hlfir_Op<"dot_product", +def hlfir_DotProductOp : hlfir_Op<"dot_product", [DeclareOpInterfaceMethods]> { let summary = "DOT_PRODUCT transformational intrinsic"; let description = [{ diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index ee21d63c02333..f3efbfaa2dc21 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -3993,8 +3993,38 @@ class FirConverter : public Fortran::lower::AbstractConverter { Fortran::lower::mapSymbolAttributes(*this, altResult, localSymbols, stmtCtx); } else { + // catch cases where the allocation for the function result storage type + // doesn't match the type of this symbol + mlir::Value preAlloc = primaryFuncResultStorage; + mlir::Type resTy = primaryFuncResultStorage.getType(); + mlir::Type symTy = genType(altResult); + mlir::Type wrappedSymTy = fir::ReferenceType::get(symTy); + if (resTy != wrappedSymTy) { + // check size of the pointed to type so we can't overflow by writing + // double precision to a single precision allocation, etc + LLVM_ATTRIBUTE_UNUSED auto getBitWidth = [this](mlir::Type ty) { + // 15.6.2.6.3: differering result types should be integer, real, + // complex or logical + if (auto cmplx = mlir::dyn_cast_or_null(ty)) { + fir::KindTy kind = cmplx.getFKind(); + return 2 * builder->getKindMap().getRealBitsize(kind); + } + if (auto logical = mlir::dyn_cast_or_null(ty)) { + fir::KindTy kind = logical.getFKind(); + return builder->getKindMap().getLogicalBitsize(kind); + } + return ty.getIntOrFloatBitWidth(); + }; + assert(getBitWidth(fir::unwrapRefType(resTy)) >= getBitWidth(symTy)); + + // convert the storage to the symbol type so that the hlfir.declare + // gets the correct type for this symbol + preAlloc = builder->create(getCurrentLocation(), + wrappedSymTy, preAlloc); + } + Fortran::lower::mapSymbolAttributes(*this, altResult, localSymbols, - stmtCtx, primaryFuncResultStorage); + stmtCtx, preAlloc); } } diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index 604291cdbec6d..4f4505c8b0664 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -1497,6 +1497,18 @@ genHLFIRIntrinsicRefCore(PreparedActualArguments &loweredActuals, return {hlfir::EntityWithAttributes{dotProductOp.getResult()}}; } + if (intrinsicName == "count") { + llvm::SmallVector operands = getOperandVector(loweredActuals); + mlir::Value array = operands[0]; + mlir::Value dim = operands[1]; + if (dim) + dim = hlfir::loadTrivialScalar(loc, builder, hlfir::Entity{dim}); + mlir::Value kind = operands[2]; + mlir::Type resultTy = computeResultType(array, *callContext.resultType); + hlfir::CountOp countOp = + builder.create(loc, resultTy, array, dim, kind); + return {hlfir::EntityWithAttributes{countOp.getResult()}}; + } // TODO add hlfir operations for other transformational intrinsics here diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index e796f2c385d95..7f899a2937987 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -928,9 +928,12 @@ mlir::OpFoldResult fir::ConvertOp::fold(FoldAdaptor adaptor) { return {}; } +bool fir::ConvertOp::isInteger(mlir::Type ty) { + return ty.isa(); +} + bool fir::ConvertOp::isIntegerCompatible(mlir::Type ty) { - return ty.isa(); + return isInteger(ty) || mlir::isa(ty); } bool fir::ConvertOp::isFloatCompatible(mlir::Type ty) { @@ -1001,8 +1004,8 @@ bool fir::ConvertOp::canBeConverted(mlir::Type inType, mlir::Type outType) { return true; return (isPointerCompatible(inType) && isPointerCompatible(outType)) || (isIntegerCompatible(inType) && isIntegerCompatible(outType)) || - (isIntegerCompatible(inType) && isFloatCompatible(outType)) || - (isFloatCompatible(inType) && isIntegerCompatible(outType)) || + (isInteger(inType) && isFloatCompatible(outType)) || + (isFloatCompatible(inType) && isInteger(outType)) || (isFloatCompatible(inType) && isFloatCompatible(outType)) || (isIntegerCompatible(inType) && isPointerCompatible(outType)) || (isPointerCompatible(inType) && isIntegerCompatible(outType)) || diff --git a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp index 7a00bfae75ed1..21a44c07953b6 100644 --- a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp +++ b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp @@ -509,6 +509,43 @@ mlir::LogicalResult hlfir::AnyOp::verify() { return verifyLogicalReductionOp(this); } +//===----------------------------------------------------------------------===// +// CountOp +//===----------------------------------------------------------------------===// + +mlir::LogicalResult hlfir::CountOp::verify() { + mlir::Operation *op = getOperation(); + + auto results = op->getResultTypes(); + assert(results.size() == 1); + mlir::Value mask = getMask(); + mlir::Value dim = getDim(); + + fir::SequenceType maskTy = + hlfir::getFortranElementOrSequenceType(mask.getType()) + .cast(); + llvm::ArrayRef maskShape = maskTy.getShape(); + + mlir::Type resultType = results[0]; + if (auto resultExpr = mlir::dyn_cast_or_null(resultType)) { + if (maskShape.size() > 1 && dim != nullptr) { + if (!resultExpr.isArray()) + return emitOpError("result must be an array"); + + llvm::ArrayRef resultShape = resultExpr.getShape(); + // Result has rank n-1 + if (resultShape.size() != (maskShape.size() - 1)) + return emitOpError("result rank must be one less than MASK"); + } else { + return emitOpError("result must be of numerical scalar type"); + } + } else if (!hlfir::isFortranScalarNumericalType(resultType)) { + return emitOpError("result must be of numerical scalar type"); + } + + return mlir::success(); +} + //===----------------------------------------------------------------------===// // ConcatOp //===----------------------------------------------------------------------===// diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp index b88b30e235a15..d3c604e249a9c 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp @@ -262,6 +262,39 @@ using AnyOpConversion = HlfirReductionIntrinsicConversion; using AllOpConversion = HlfirReductionIntrinsicConversion; +struct CountOpConversion : public HlfirIntrinsicConversion { + using HlfirIntrinsicConversion::HlfirIntrinsicConversion; + + mlir::LogicalResult + matchAndRewrite(hlfir::CountOp count, + mlir::PatternRewriter &rewriter) const override { + fir::KindMapping kindMapping{rewriter.getContext()}; + fir::FirOpBuilder builder{rewriter, kindMapping}; + const mlir::Location &loc = count->getLoc(); + + mlir::Type i32 = builder.getI32Type(); + mlir::Type logicalType = fir::LogicalType::get( + builder.getContext(), builder.getKindMap().defaultLogicalKind()); + + llvm::SmallVector inArgs; + inArgs.push_back({count.getMask(), logicalType}); + inArgs.push_back({count.getDim(), i32}); + inArgs.push_back({count.getKind(), i32}); + + auto *argLowering = fir::getIntrinsicArgumentLowering("count"); + llvm::SmallVector args = + lowerArguments(count, inArgs, rewriter, argLowering); + + mlir::Type scalarResultType = hlfir::getFortranElementType(count.getType()); + + auto [resultExv, mustBeFreed] = + fir::genIntrinsicCall(builder, loc, "count", scalarResultType, args); + + processReturnValue(count, resultExv, mustBeFreed, builder, rewriter); + return mlir::success(); + } +}; + struct MatmulOpConversion : public HlfirIntrinsicConversion { using HlfirIntrinsicConversion::HlfirIntrinsicConversion; @@ -405,14 +438,14 @@ class LowerHLFIRIntrinsics patterns.insert(context); + CountOpConversion, DotProductOpConversion>(context); mlir::ConversionTarget target(*context); target.addLegalDialect(); target.addIllegalOp(); + hlfir::AllOp, hlfir::DotProductOp, hlfir::CountOp>(); target.markUnknownOpDynamicallyLegal( [](mlir::Operation *) { return true; }); if (mlir::failed( diff --git a/flang/test/Fir/invalid.fir b/flang/test/Fir/invalid.fir index c01bcc809d341..c3bfb6922deda 100644 --- a/flang/test/Fir/invalid.fir +++ b/flang/test/Fir/invalid.fir @@ -946,3 +946,19 @@ func.func @invalid_selector(%arg : !fir.box>) -> i32 { %zero = arith.constant 0 : i32 return %zero : i32 } + +// ----- + +func.func @logical_to_fp(%arg0: !fir.logical<4>) -> f32 { + // expected-error@+1{{'fir.convert' op invalid type conversion}} + %0 = fir.convert %arg0 : (!fir.logical<4>) -> f32 + return %0 : f32 +} + +// ----- + +func.func @fp_to_logical(%arg0: f32) -> !fir.logical<4> { + // expected-error@+1{{'fir.convert' op invalid type conversion}} + %0 = fir.convert %arg0 : (f32) -> !fir.logical<4> + return %0 : !fir.logical<4> +} diff --git a/flang/test/HLFIR/count-lowering.fir b/flang/test/HLFIR/count-lowering.fir new file mode 100644 index 0000000000000..0d9cc34a316eb --- /dev/null +++ b/flang/test/HLFIR/count-lowering.fir @@ -0,0 +1,164 @@ +// Test hlfir.count operation lowering to fir runtime call +// RUN: fir-opt %s -lower-hlfir-intrinsics | FileCheck %s + +func.func @_QPcount1(%arg0: !fir.box>> {fir.bindc_name = "a"}, %arg1: !fir.ref {fir.bindc_name = "s"}) { + %0:2 = hlfir.declare %arg0 {uniq_name = "_QFcount1Ea"} : (!fir.box>>) -> (!fir.box>>, !fir.box>>) + %1:2 = hlfir.declare %arg1 {uniq_name = "_QFcount1Es"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %2 = hlfir.count %0#0 {fastmath = #arith.fastmath} : (!fir.box>>) -> i32 + hlfir.assign %2 to %1#0 : i32, !fir.ref + return +} +// CHECK-LABEL: func.func @_QPcount1( +// CHECK: %[[ARG0:.*]]: !fir.box>> +// CHECK: %[[ARG1:.*]]: !fir.ref +// CHECK-DAG: %[[MASK:.*]]:2 = hlfir.declare %[[ARG0]] +// CHECK-DAG: %[[RES:.*]]:2 = hlfir.declare %[[ARG1]] +// CHECK-DAG: %[[MASK_ARG:.*]] = fir.convert %[[MASK]]#1 : (!fir.box>>) -> !fir.box +// CHECK: %[[RET_ARG:.*]] = fir.call @_FortranACount(%[[MASK_ARG]], %[[LOC_STR:.*]], %[[LOC_N:.*]], %[[C1:.*]]) : (!fir.box, !fir.ref, i32, i32) -> i64 +// CHECK-NEXT: %[[RET:.*]] = fir.convert %[[RET_ARG]] : (i64) -> i32 +// CHECK-NEXT: hlfir.assign %[[RET]] to %[[RES]]#0 : i32, !fir.ref +// CHECK-NEXT: return +// CHECK-NEXT: } + +func.func @_QPcount2(%arg0: !fir.box>> {fir.bindc_name = "a"}, %arg1: !fir.box> {fir.bindc_name = "s"}, %arg2: !fir.ref {fir.bindc_name = "d"}) { + %0:2 = hlfir.declare %arg0 {uniq_name = "_QFcount2Ea"} : (!fir.box>>) -> (!fir.box>>, !fir.box>>) + %1:2 = hlfir.declare %arg2 {uniq_name = "_QFcount2Ed"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %2:2 = hlfir.declare %arg1 {uniq_name = "_QFcount2Es"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %3 = fir.load %1#0 : !fir.ref + %4 = hlfir.count %0#0 dim %3 {fastmath = #arith.fastmath} : (!fir.box>>, i32) -> !hlfir.expr + hlfir.assign %4 to %2#0 : !hlfir.expr, !fir.box> + hlfir.destroy %4 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @_QPcount2( +// CHECK: %[[ARG0:.*]]: !fir.box>> +// CHECK: %[[ARG1:.*]]: !fir.box +// CHECK: %[[ARG2:.*]]: !fir.ref +// CHECK-DAG: %[[MASK:.*]]:2 = hlfir.declare %[[ARG0]] +// CHECK-DAG: %[[DIM_VAR:.*]]:2 = hlfir.declare %[[ARG2]] +// CHECK-DAG: %[[RES:.*]]:2 = hlfir.declare %[[ARG1]] + +// CHECK-DAG: %[[RET_BOX:.*]] = fir.alloca !fir.box>> +// CHECK-DAG: %[[RET_ADDR:.*]] = fir.zero_bits !fir.heap> +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[RET_SHAPE:.*]] = fir.shape %[[C0]] : (index) -> !fir.shape<1> +// CHECK-DAG: %[[RET_EMBOX:.*]] = fir.embox %[[RET_ADDR]](%[[RET_SHAPE]]) +// CHECK-DAG: fir.store %[[RET_EMBOX]] to %[[RET_BOX]] + +// CHECK-DAG: %[[DIM:.*]] = fir.load %[[DIM_VAR]]#0 : !fir.ref +// CHECK-DAG: %[[RET_ARG:.*]] = fir.convert %[[RET_BOX]] +// CHECK-DAG: %[[MASK_ARG:.*]] = fir.convert %[[MASK]]#1 + +// CHECK: %[[NONE:.*]] = fir.call @_FortranACountDim(%[[RET_ARG]], %[[MASK_ARG]], %[[DIM]], %[[LOC_STR:.*]], %[[LOC_N:.*]]) : (!fir.ref>, !fir.box, i32, i32, !fir.ref, i32) -> none +// CHECK: %[[RET:.*]] = fir.load %[[RET_BOX]] +// CHECK: %[[BOX_DIMS:.*]]:3 = fir.box_dims %[[RET]] +// CHECK-NEXT: %[[ADDR:.*]] = fir.box_addr %[[RET]] +// CHECK-NEXT: %[[SHIFT:.*]] = fir.shape_shift %[[BOX_DIMS]]#0, %[[BOX_DIMS]]#1 +// CHECK-NEXT: %[[TMP:.*]]:2 = hlfir.declare %[[ADDR]](%[[SHIFT]]) {uniq_name = ".tmp.intrinsic_result"} +// CHECK: %[[TRUE:.*]] = arith.constant true +// CHECK: %[[EXPR:.*]] = hlfir.as_expr %[[TMP]]#0 move %[[TRUE]] : (!fir.box>, i1) -> !hlfir.expr +// CHECK: hlfir.assign %[[EXPR]] to %[[RES]]#0 +// CHECK: hlfir.destroy %[[EXPR]] +// CHECK-NEXT: return +// CHECK-NEXT: } + +func.func @_QPcount3(%arg0: !fir.ref> {fir.bindc_name = "s"}) { + %0 = fir.address_of(@_QFcount3Ea) : !fir.ref>> + %c2 = arith.constant 2 : index + %c2_0 = arith.constant 2 : index + %1 = fir.shape %c2, %c2_0 : (index, index) -> !fir.shape<2> + %2:2 = hlfir.declare %0(%1) {uniq_name = "_QFcount3Ea"} : (!fir.ref>>, !fir.shape<2>) -> (!fir.ref>>, !fir.ref>>) + %c2_1 = arith.constant 2 : index + %3 = fir.shape %c2_1 : (index) -> !fir.shape<1> + %4:2 = hlfir.declare %arg0(%3) {uniq_name = "_QFcount3Es"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) + %c1_i32 = arith.constant 1 : i32 + %5 = hlfir.count %2#0 dim %c1_i32 {fastmath = #arith.fastmath} : (!fir.ref>>, i32) -> !hlfir.expr<2xi32> + hlfir.assign %5 to %4#0 : !hlfir.expr<2xi32>, !fir.ref> + hlfir.destroy %5 : !hlfir.expr<2xi32> + return +} +// CHECK-LABEL: func.func @_QPcount3( +// CHECK: %[[ARG0:.*]]: !fir.ref> +// CHECK-DAG: %[[RET_BOX:.*]] = fir.alloca !fir.box>> +// CHECK-DAG: %[[RET_ADDR:.*]] = fir.zero_bits !fir.heap> +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[RET_SHAPE:.*]] = fir.shape %[[C0]] : (index) -> !fir.shape<1> +// CHECK-DAG: %[[RET_EMBOX:.*]] = fir.embox %[[RET_ADDR]](%[[RET_SHAPE]]) +// CHECK-DAG: fir.store %[[RET_EMBOX]] to %[[RET_BOX]] +// CHECK-DAG: %[[RES:.*]]:2 = hlfir.declare %[[ARG0]](%[[RES_SHAPE:.*]]) + +// CHECK-DAG: %[[MASK_ADDR:.*]] = fir.address_of +// CHECK-DAG: %[[MASK_VAR:.*]]:2 = hlfir.declare %[[MASK_ADDR]](%[[MASK_SHAPE:.*]]) +// CHECK-DAG: %[[MASK_BOX:.*]] = fir.embox %[[MASK_VAR]]#1(%[[MASK_SHAPE:.*]]) + +// CHECK-DAG: %[[DIM:.*]] = arith.constant 1 : i32 + +// CHECK-DAG: %[[RET_ARG:.*]] = fir.convert %[[RET_BOX]] +// CHECK-DAG: %[[MASK_ARG:.*]] = fir.convert %[[MASK_BOX]] : (!fir.box>>) -> !fir.box +// CHECK: %[[NONE:.*]] = fir.call @_FortranACountDim(%[[RET_ARG]], %[[MASK_ARG]], %[[DIM]], %[[LOC_STR:.*]], %[[LOC_N:.*]]) +// CHECK: %[[RET:.*]] = fir.load %[[RET_BOX]] +// CHECK: %[[BOX_DIMS:.*]]:3 = fir.box_dims %[[RET]] +// CHECK-NEXT: %[[ADDR:.*]] = fir.box_addr %[[RET]] +// CHECK-NEXT: %[[SHIFT:.*]] = fir.shape_shift %[[BOX_DIMS]]#0, %[[BOX_DIMS]]#1 +// CHECK-NEXT: %[[TMP:.*]]:2 = hlfir.declare %[[ADDR]](%[[SHIFT]]) {uniq_name = ".tmp.intrinsic_result"} +// CHECK: %[[TRUE:.*]] = arith.constant true +// CHECK: %[[EXPR:.*]] = hlfir.as_expr %[[TMP]]#0 move %[[TRUE]] : (!fir.box>, i1) -> !hlfir.expr +// CHECK: hlfir.assign %[[EXPR]] to %[[RES]] +// CHECK: hlfir.destroy %[[EXPR]] +// CHECK-NEXT: return +// CHECK-NEXT: } + +func.func @_QPcount4(%arg0: !fir.box>> {fir.bindc_name = "a"}, %arg1: !fir.box> {fir.bindc_name = "s"}, %arg2: !fir.ref {fir.bindc_name = "d"}) { + %0:2 = hlfir.declare %arg0 {uniq_name = "_QFcount4Ea"} : (!fir.box>>) -> (!fir.box>>, !fir.box>>) + %1:2 = hlfir.declare %arg2 {uniq_name = "_QFcount4Ed"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %2:2 = hlfir.declare %arg1 {uniq_name = "_QFcount4Es"} : (!fir.box>) -> (!fir.box>, !fir.box>) + %c8_i32 = arith.constant 8 : i32 + %3 = fir.load %1#0 : !fir.ref + %4 = hlfir.count %0#0 dim %3 kind %c8_i32 {fastmath = #arith.fastmath} : (!fir.box>>, i32, i32) -> !hlfir.expr + %5 = hlfir.shape_of %4 : (!hlfir.expr) -> !fir.shape<1> + %6 = hlfir.elemental %5 : (!fir.shape<1>) -> !hlfir.expr { + ^bb0(%arg3: index): + %7 = hlfir.apply %4, %arg3 : (!hlfir.expr, index) -> i64 + %8 = fir.convert %7 : (i64) -> i32 + hlfir.yield_element %8 : i32 + } + hlfir.assign %6 to %2#0 : !hlfir.expr, !fir.box> + hlfir.destroy %6 : !hlfir.expr + hlfir.destroy %4 : !hlfir.expr + return +} +// CHECK-LABEL: func.func @_QPcount4( +// CHECK: %[[ARG0:.*]]: !fir.box>> +// CHECK: %[[ARG1:.*]]: !fir.box +// CHECK: %[[ARG2:.*]]: !fir.ref +// CHECK-DAG: %[[MASK:.*]]:2 = hlfir.declare %[[ARG0]] +// CHECK-DAG: %[[DIM_VAR:.*]]:2 = hlfir.declare %[[ARG2]] +// CHECK-DAG: %[[RES:.*]]:2 = hlfir.declare %[[ARG1]] + +// CHECK-DAG: %[[RET_BOX:.*]] = fir.alloca !fir.box>> +// CHECK-DAG: %[[RET_ADDR:.*]] = fir.zero_bits !fir.heap> +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[RET_SHAPE:.*]] = fir.shape %[[C0]] : (index) -> !fir.shape<1> +// CHECK-DAG: %[[RET_EMBOX:.*]] = fir.embox %[[RET_ADDR]](%[[RET_SHAPE]]) +// CHECK-DAG: fir.store %[[RET_EMBOX]] to %[[RET_BOX]] + +// CHECK-DAG: %[[DIM:.*]] = fir.load %[[DIM_VAR]]#0 : !fir.ref +// CHECK-DAG: %[[KIND:.*]] = arith.constant 8 : i32 +// CHECK-DAG: %[[RET_ARG:.*]] = fir.convert %[[RET_BOX]] +// CHECK-DAG: %[[MASK_ARG:.*]] = fir.convert %[[MASK]]#1 + +// CHECK: %[[NONE:.*]] = fir.call @_FortranACountDim(%[[RET_ARG]], %[[MASK_ARG]], %[[DIM]], %[[KIND]], %[[LOC_STR:.*]], %[[LOC_N:.*]]) : (!fir.ref>, !fir.box, i32, i32, !fir.ref, i32) -> none +// CHECK: %[[RET:.*]] = fir.load %[[RET_BOX]] +// CHECK: %[[BOX_DIMS:.*]]:3 = fir.box_dims %[[RET]] +// CHECK-NEXT: %[[ADDR:.*]] = fir.box_addr %[[RET]] +// CHECK-NEXT: %[[SHIFT:.*]] = fir.shape_shift %[[BOX_DIMS]]#0, %[[BOX_DIMS]]#1 +// CHECK-NEXT: %[[TMP:.*]]:2 = hlfir.declare %[[ADDR]](%[[SHIFT]]) {uniq_name = ".tmp.intrinsic_result"} +// CHECK: %[[TRUE:.*]] = arith.constant true +// CHECK: %[[EXPR:.*]] = hlfir.as_expr %[[TMP]]#0 move %[[TRUE]] : (!fir.box>, i1) -> !hlfir.expr +// CHECK-NEXT: %[[OUT_SHAPE:.*]] = hlfir.shape_of %[[EXPR]] +// CHECK-NEXT: %[[OUT:.*]] = hlfir.elemental %[[OUT_SHAPE]] : (!fir.shape<1>) -> !hlfir.expr +// CHECK-DAG: hlfir.assign %[[OUT]] to %[[RES]]#0 +// CHECK-NEXT: hlfir.destroy %[[OUT]] : !hlfir.expr +// CHECK-NEXT: hlfir.destroy %[[EXPR]] : !hlfir.expr +// CHECK-NEXT: return +// CHECK-NEXT: } diff --git a/flang/test/HLFIR/count.fir b/flang/test/HLFIR/count.fir new file mode 100644 index 0000000000000..c25f9b94124af --- /dev/null +++ b/flang/test/HLFIR/count.fir @@ -0,0 +1,83 @@ +// Test hlfir.count operation parse, verify (no errors), and unparse + +// RUN: fir-opt %s | fir-opt | FileCheck %s + +// mask is an expression of known shape +func.func @count0(%arg0: !hlfir.expr<2x!fir.logical<4>>) { + %count = hlfir.count %arg0 : (!hlfir.expr<2x!fir.logical<4>>) -> i32 + return +} +// CHECK: func.func @count0(%[[ARRAY:.*]]: !hlfir.expr<2x!fir.logical<4>>) { +// CHECK-NEXT: %[[COUNT:.*]] = hlfir.count %[[ARRAY]] : (!hlfir.expr<2x!fir.logical<4>>) -> i32 +// CHECK-NEXT: return +// CHECK-NEXT: } + +// mask is an expression of assumed shape +func.func @count1(%arg0: !hlfir.expr>) { + %count = hlfir.count %arg0 : (!hlfir.expr>) -> i32 + return +} +// CHECK: func.func @count1(%[[ARRAY:.*]]: !hlfir.expr>) { +// CHECK-NEXT: %[[COUNT:.*]] = hlfir.count %[[ARRAY]] : (!hlfir.expr>) -> i32 +// CHECK-NEXT: return +// CHECK-NEXT: } + +// mask is a boxed array +func.func @count2(%arg0: !fir.box>>) { + %count = hlfir.count %arg0 : (!fir.box>>) -> i32 + return +} +// CHECK: func.func @count2(%[[ARRAY:.*]]: !fir.box>>) { +// CHECK-NEXT: %[[COUNT:.*]] = hlfir.count %[[ARRAY]] : (!fir.box>>) -> i32 +// CHECK-NEXT: return +// CHECK-NEXT: } + +// mask is an assumed shape boxed array +func.func @count3(%arg0: !fir.box>>) { + %count = hlfir.count %arg0 : (!fir.box>>) -> i32 + return +} +// CHECK: func.func @count3(%[[ARRAY:.*]]: !fir.box>>) { +// CHECK-NEXT: %[[COUNT:.*]] = hlfir.count %[[ARRAY]] : (!fir.box>>) -> i32 +// CHECK-NEXT: return +// CHECK-NEXT: } + +// mask is a 2-dimensional array +func.func @count4(%arg0: !fir.box>>){ + %count = hlfir.count %arg0 : (!fir.box>>) -> i32 + return +} +// CHECK: func.func @count4(%[[ARRAY:.*]]: !fir.box>>) { +// CHECK-NEXT: %[[COUNT:.*]] = hlfir.count %[[ARRAY]] : (!fir.box>>) -> i32 +// CHECK-NEXT: return +// CHECK-NEXT: } + +// mask and dim argument +func.func @count5(%arg0: !fir.box>>, %arg1: i32) { + %count = hlfir.count %arg0 dim %arg1 : (!fir.box>>, i32) -> i32 + return +} +// CHECK: func.func @count5(%[[ARRAY:.*]]: !fir.box>>, %[[DIM:.*]]: i32) { +// CHECK-NEXT: %[[COUNT:.*]] = hlfir.count %[[ARRAY]] dim %[[DIM]] : (!fir.box>>, i32) -> i32 +// CHECK-NEXT: return +// CHECK-NEXT: } + +// mask is a 2 dimensional array with dim +func.func @count6(%arg0: !fir.box>>, %arg1: i32) { + %count = hlfir.count %arg0 dim %arg1 : (!fir.box>>, i32) -> i32 + return +} +// CHECK: func.func @count6(%[[ARRAY:.*]]: !fir.box>>, %[[DIM:.*]]: i32) { +// CHECK-NEXT: %[[ANY:.*]] = hlfir.count %[[ARRAY]] dim %[[DIM]] : (!fir.box>>, i32) -> i32 +// CHECK-NEXT: return +// CHECK-NEXT: } + +// mask is of a different logical type +func.func @count7(%arg0: !fir.box>>, %arg1: i32) { + %count = hlfir.count %arg0 dim %arg1 : (!fir.box>>, i32) -> i32 + return +} +// CHECK: func.func @count7(%[[ARRAY:.*]]: !fir.box>>, %[[DIM:.*]]: i32) { +// CHECK-NEXT: %[[ANY:.*]] = hlfir.count %[[ARRAY]] dim %[[DIM]] : (!fir.box>>, i32) -> i32 +// CHECK-NEXT: return +// CHECK-NEXT: } \ No newline at end of file diff --git a/flang/test/HLFIR/invalid.fir b/flang/test/HLFIR/invalid.fir index 01bccdf80428b..6db1b79e0818f 100644 --- a/flang/test/HLFIR/invalid.fir +++ b/flang/test/HLFIR/invalid.fir @@ -368,6 +368,30 @@ func.func @bad_all6(%arg0: !hlfir.expr>) { %0 = hlfir.all %arg0 : (!hlfir.expr>) -> !hlfir.expr> } +// ----- +func.func @bad_count1(%arg0: !hlfir.expr>, %arg1: i32) { + // expected-error@+1 {{'hlfir.count' op result must be an array}} + %0 = hlfir.count %arg0 dim %arg1 : (!hlfir.expr>, i32) -> !hlfir.expr +} + +// ----- +func.func @bad_count2(%arg0: !hlfir.expr>, %arg1: i32){ + // expected-error@+1 {{'hlfir.count' op result rank must be one less than MASK}} + %0 = hlfir.count %arg0 dim %arg1 : (!hlfir.expr>, i32) -> !hlfir.expr> +} + +// ----- +func.func @bad_count3(%arg0: !hlfir.expr>, %arg1: i32) { + // expected-error@+1 {{'hlfir.count' op result must be of numerical scalar type}} + %0 = hlfir.count %arg0 dim %arg1 : (!hlfir.expr>, i32) -> !hlfir.expr +} + +// ----- +func.func @bad_count4(%arg0: !hlfir.expr>, %arg1: i32) { + // expected-error@+1 {{'hlfir.count' op result must be of numerical scalar type}} + %0 = hlfir.count %arg0 dim %arg1 : (!hlfir.expr>, i32) -> !fir.logical<4> +} + // ----- func.func @bad_product1(%arg0: !hlfir.expr, %arg1: i32, %arg2: !fir.box>) { // expected-error@+1 {{'hlfir.product' op result must have the same element type as ARRAY argument}} diff --git a/flang/test/Lower/HLFIR/count.f90 b/flang/test/Lower/HLFIR/count.f90 new file mode 100644 index 0000000000000..25c74841514e9 --- /dev/null +++ b/flang/test/Lower/HLFIR/count.f90 @@ -0,0 +1,82 @@ +! Test lowering of COUNT intrinsic to HLFIR +! RUN: bbc -emit-hlfir -o - %s 2>&1 | FileCheck %s + +! simple 1 argument COUNT +subroutine count1(a, s) + logical :: a(:) + integer :: s + s = COUNT(a) +end subroutine +! CHECK-LABEL: func.func @_QPcount1( +! CHECK: %[[ARG0:.*]]: !fir.box>> +! CHECK: %[[ARG1:.*]]: !fir.ref +! CHECK-DAG: %[[MASK:.*]]:2 = hlfir.declare %[[ARG0]] +! CHECK-DAG: %[[OUT:.*]]:2 = hlfir.declare %[[ARG1]] +! CHECK-NEXT: %[[EXPR:.*]] = hlfir.count %[[MASK]]#0 : (!fir.box>>) -> i32 +! CHECK-NEXT: hlfir.assign %[[EXPR]] to %[[OUT]]#0 : i32, !fir.ref +! CHECK-NEXT: return +! CHECK-NEXT: } + +! count with by-ref DIM argument +subroutine count2(a, s, d) + logical :: a(:,:) + integer :: s(:), d + s = COUNT(a, d) +end subroutine +! CHECK-LABEL: func.func @_QPcount2( +! CHECK: %[[ARG0:.*]]: !fir.box>> +! CHECK: %[[ARG1:.*]]: !fir.box> +! CHECK: %[[ARG2:.*]]: !fir.ref +! CHECK-DAG: %[[MASK:.*]]:2 = hlfir.declare %[[ARG0]] +! CHECK-DAG: %[[DIM_REF:.*]]:2 = hlfir.declare %[[ARG2]] +! CHECK-DAG: %[[OUT:.*]]:2 = hlfir.declare %[[ARG1]] +! CHECK-DAG: %[[DIM:.*]] = fir.load %[[DIM_REF]]#0 : !fir.ref +! CHECK-DAG: %[[EXPR:.*]] = hlfir.count %[[MASK]]#0 dim %[[DIM]] : (!fir.box>>, i32) -> !hlfir.expr +! CHECK-NEXT: hlfir.assign %[[EXPR]] to %[[OUT]]#0 : !hlfir.expr, !fir.box> +! CHECK-NEXT: hlfir.destroy %[[EXPR]] : !hlfir.expr +! CHECK-NEXT: return +! CHECK-NEXT: } + +! count with DIM argument by-val, mask isn't boxed +subroutine count3(s) + integer :: s(2) + logical :: a(2,2) = reshape((/.true.,.false.,.true.,.false./), shape(a)) + s = COUNT(a, 1) +end subroutine +! CHECK-LABEL: func.func @_QPcount3( +! CHECK: %[[ARG0:.*]]: !fir.ref> +! CHECK-DAG: %[[ADDR:.*]] = fir.address_of{{.*}} : !fir.ref>> +! CHECK-DAG: %[[MASK_SHAPE:.*]] = fir.shape {{.*}} -> !fir.shape<2> +! CHECK-DAG: %[[MASK:.*]]:2 = hlfir.declare %[[ADDR]](%[[MASK_SHAPE]]) +! CHECK-DAG: %[[OUT_SHAPE:.*]] = fir.shape {{.*}} -> !fir.shape<1> +! CHECK-DAG: %[[OUT:.*]]:2 = hlfir.declare %[[ARG0]](%[[OUT_SHAPE]]) +! CHECK-DAG: %[[C1:.*]] = arith.constant 1 : i32 +! CHECK-DAG: %[[EXPR:.*]] = hlfir.count %[[MASK]]#0 dim %[[C1]] : (!fir.ref>>, i32) -> !hlfir.expr<2xi32> +! CHECK-DAG: hlfir.assign %[[EXPR]] to %[[OUT]]#0 : !hlfir.expr<2xi32>, !fir.ref> +! CHECK-NEXT: hlfir.destroy %[[EXPR]] : !hlfir.expr<2xi32> +! CHECK-NEXT: return +! CHECK-NEXT: } + +! count with dim and kind arguments +subroutine count4(a, s, d) + logical :: a(:,:) + integer :: s(:), d + s = COUNT(a, d, 8) +end subroutine +! CHECK-LABEL: func.func @_QPcount4( +! CHECK: %[[ARG0:.*]]: !fir.box>> +! CHECK: %[[ARG1:.*]]: !fir.box> +! CHECK: %[[ARG2:.*]]: !fir.ref +! CHECK-DAG: %[[MASK:.*]]:2 = hlfir.declare %[[ARG0]] +! CHECK-DAG: %[[DIM_REF:.*]]:2 = hlfir.declare %[[ARG2]] +! CHECK-DAG: %[[OUT:.*]]:2 = hlfir.declare %[[ARG1]] +! CHECK-DAG: %[[C8:.*]] = arith.constant 8 : i32 +! CHECK-DAG: %[[DIM:.*]] = fir.load %[[DIM_REF]]#0 : !fir.ref +! CHECK-DAG: %[[EXPR:.*]] = hlfir.count %[[MASK]]#0 dim %[[DIM]] kind %[[C8]] : (!fir.box>>, i32, i32) -> !hlfir.expr +! CHECK-DAG: %[[RES_SHAPE:.*]] = hlfir.shape_of %[[EXPR]] +! CHECK-DAG: %[[RES:.*]] = hlfir.elemental %[[RES_SHAPE]] : (!fir.shape<1>) -> !hlfir.expr +! CHECK-DAG: hlfir.assign %[[RES]] to %[[OUT]]#0 +! CHECK-NEXT: hlfir.destroy %[[RES]] : !hlfir.expr +! CHECK-NEXT: hlfir.destroy %[[EXPR]] : !hlfir.expr +! CHECK-NEXT: return +! CHECK-NEXT: } diff --git a/flang/test/Lower/HLFIR/entry_return.f90 b/flang/test/Lower/HLFIR/entry_return.f90 new file mode 100644 index 0000000000000..d2fb80c8b97b7 --- /dev/null +++ b/flang/test/Lower/HLFIR/entry_return.f90 @@ -0,0 +1,86 @@ +! RUN: bbc -emit-hlfir -o - %s | FileCheck %s +! test multiple return values with different types coming from ENTRY statements + +complex function f1() + logical e1 + entry e1() + e1 = .false. +end function +! CHECK-LABEL: func.func @_QPf1() -> !fir.complex<4> { +! CHECK: %[[VAL_0:.*]] = fir.alloca !fir.complex<4> {bindc_name = "f1", uniq_name = "_QFf1Ef1"} +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFf1Ef1"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_1]]#1 : (!fir.ref>) -> !fir.ref> +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFf1Ee1"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: cf.br ^bb1 +! CHECK: ^bb1: +! CHECK: %[[VAL_4:.*]] = arith.constant false +! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_5]] to %[[VAL_3]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_1]]#1 : !fir.ref> +! CHECK: return %[[VAL_6]] : !fir.complex<4> +! CHECK: } + +! // CHECK-LABEL: func.func @_QPe1() -> !fir.logical<4> { +! CHECK: %[[VAL_0:.*]] = fir.alloca !fir.complex<4> {bindc_name = "f1", uniq_name = "_QFf1Ef1"} +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFf1Ef1"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_1]]#1 : (!fir.ref>) -> !fir.ref> +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFf1Ee1"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: cf.br ^bb1 +! CHECK: ^bb1: +! CHECK: %[[VAL_4:.*]] = arith.constant false +! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (i1) -> !fir.logical<4> +! CHECK: hlfir.assign %[[VAL_5]] to %[[VAL_3]]#0 : !fir.logical<4>, !fir.ref> +! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_3]]#1 : !fir.ref> +! CHECK: return %[[VAL_6]] : !fir.logical<4> +! CHECK: } + +logical function f2() + complex e2 + entry e2() + e2 = complex(1.0, 2.0) +end function +! CHECK-LABEL: func.func @_QPf2() -> !fir.logical<4> { +! CHECK: %[[VAL_0:.*]] = fir.alloca !fir.complex<4> {bindc_name = "e2", uniq_name = "_QFf2Ee2"} +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFf2Ee2"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_1]]#1 : (!fir.ref>) -> !fir.ref> +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFf2Ef2"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: cf.br ^bb1 +! CHECK: ^bb1: +! CHECK: %[[VAL_4:.*]] = arith.constant 1.000000e+00 : f32 +! CHECK: %[[VAL_5:.*]] = arith.constant 2.000000e+00 : f32 +! CHECK: %[[VAL_6:.*]]:3 = hlfir.associate %[[VAL_4]] {uniq_name = "adapt.valuebyref"} : (f32) -> (!fir.ref, !fir.ref, i1) +! CHECK: %[[VAL_7:.*]]:3 = hlfir.associate %[[VAL_5]] {uniq_name = "adapt.valuebyref"} : (f32) -> (!fir.ref, !fir.ref, i1) +! CHECK: %[[VAL_8:.*]] = fir.call @_QPcomplex(%[[VAL_6]]#1, %[[VAL_7]]#1) fastmath : (!fir.ref, !fir.ref) -> f32 +! CHECK: hlfir.end_associate %[[VAL_6]]#1, %[[VAL_6]]#2 : !fir.ref, i1 +! CHECK: hlfir.end_associate %[[VAL_7]]#1, %[[VAL_7]]#2 : !fir.ref, i1 +! CHECK: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32 +! CHECK: %[[VAL_10:.*]] = fir.undefined !fir.complex<4> +! CHECK: %[[VAL_11:.*]] = fir.insert_value %[[VAL_10]], %[[VAL_8]], [0 : index] : (!fir.complex<4>, f32) -> !fir.complex<4> +! CHECK: %[[VAL_12:.*]] = fir.insert_value %[[VAL_11]], %[[VAL_9]], [1 : index] : (!fir.complex<4>, f32) -> !fir.complex<4> +! CHECK: hlfir.assign %[[VAL_12]] to %[[VAL_1]]#0 : !fir.complex<4>, !fir.ref> +! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_3]]#1 : !fir.ref> +! CHECK: return %[[VAL_13]] : !fir.logical<4> +! CHECK: } + +! CHECK-LABEL: func.func @_QPe2() -> !fir.complex<4> { +! CHECK: %[[VAL_0:.*]] = fir.alloca !fir.complex<4> {bindc_name = "e2", uniq_name = "_QFf2Ee2"} +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFf2Ee2"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_1]]#1 : (!fir.ref>) -> !fir.ref> +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFf2Ef2"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: cf.br ^bb1 +! CHECK: ^bb1: +! CHECK: %[[VAL_4:.*]] = arith.constant 1.000000e+00 : f32 +! CHECK: %[[VAL_5:.*]] = arith.constant 2.000000e+00 : f32 +! CHECK: %[[VAL_6:.*]]:3 = hlfir.associate %[[VAL_4]] {uniq_name = "adapt.valuebyref"} : (f32) -> (!fir.ref, !fir.ref, i1) +! CHECK: %[[VAL_7:.*]]:3 = hlfir.associate %[[VAL_5]] {uniq_name = "adapt.valuebyref"} : (f32) -> (!fir.ref, !fir.ref, i1) +! CHECK: %[[VAL_8:.*]] = fir.call @_QPcomplex(%[[VAL_6]]#1, %[[VAL_7]]#1) fastmath : (!fir.ref, !fir.ref) -> f32 +! CHECK: hlfir.end_associate %[[VAL_6]]#1, %[[VAL_6]]#2 : !fir.ref, i1 +! CHECK: hlfir.end_associate %[[VAL_7]]#1, %[[VAL_7]]#2 : !fir.ref, i1 +! CHECK: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32 +! CHECK: %[[VAL_10:.*]] = fir.undefined !fir.complex<4> +! CHECK: %[[VAL_11:.*]] = fir.insert_value %[[VAL_10]], %[[VAL_8]], [0 : index] : (!fir.complex<4>, f32) -> !fir.complex<4> +! CHECK: %[[VAL_12:.*]] = fir.insert_value %[[VAL_11]], %[[VAL_9]], [1 : index] : (!fir.complex<4>, f32) -> !fir.complex<4> +! CHECK: hlfir.assign %[[VAL_12]] to %[[VAL_1]]#0 : !fir.complex<4>, !fir.ref> +! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_1]]#1 : !fir.ref> +! CHECK: return %[[VAL_13]] : !fir.complex<4> +! CHECK: } diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp index 326123d8b7482..65bfb585da7b1 100644 --- a/libc/src/__support/File/file.cpp +++ b/libc/src/__support/File/file.cpp @@ -335,7 +335,6 @@ int File::flush_unlocked() { return buf_result.error; } pos = 0; - return platform_flush(this); } // TODO: Add POSIX behavior for input streams. return 0; diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h index 46bad008fcf7f..3a4bc89a7b311 100644 --- a/libc/src/__support/File/file.h +++ b/libc/src/__support/File/file.h @@ -56,7 +56,6 @@ class File { // file position indicator. using SeekFunc = ErrorOr(File *, long, int); using CloseFunc = int(File *); - using FlushFunc = int(File *); // CleanupFunc is a function which does the equivalent of this: // // void my_file_cleanup(File *f) { @@ -103,7 +102,6 @@ class File { ReadFunc *platform_read; SeekFunc *platform_seek; CloseFunc *platform_close; - FlushFunc *platform_flush; CleanupFunc *platform_cleanup; Mutex mutex; @@ -202,15 +200,13 @@ class File { // is zero. This way, we will not have to employ the semantics of // the set_buffer method and allocate a buffer. constexpr File(WriteFunc *wf, ReadFunc *rf, SeekFunc *sf, CloseFunc *cf, - FlushFunc *ff, CleanupFunc *clf, uint8_t *buffer, - size_t buffer_size, int buffer_mode, bool owned, - ModeFlags modeflags) + CleanupFunc *clf, uint8_t *buffer, size_t buffer_size, + int buffer_mode, bool owned, ModeFlags modeflags) : platform_write(wf), platform_read(rf), platform_seek(sf), - platform_close(cf), platform_flush(ff), platform_cleanup(clf), - mutex(false, false, false), ungetc_buf(0), buf(buffer), - bufsize(buffer_size), bufmode(buffer_mode), own_buf(owned), - mode(modeflags), pos(0), prev_op(FileOp::NONE), read_limit(0), - eof(false), err(false) { + platform_close(cf), platform_cleanup(clf), mutex(false, false, false), + ungetc_buf(0), buf(buffer), bufsize(buffer_size), bufmode(buffer_mode), + own_buf(owned), mode(modeflags), pos(0), prev_op(FileOp::NONE), + read_limit(0), eof(false), err(false) { if constexpr (ENABLE_BUFFER) adjust_buf(); } diff --git a/libc/src/__support/File/gpu/file.cpp b/libc/src/__support/File/gpu/file.cpp index f31bcb53292cb..e2a0c1579ffa3 100644 --- a/libc/src/__support/File/gpu/file.cpp +++ b/libc/src/__support/File/gpu/file.cpp @@ -26,8 +26,8 @@ class GPUFile : public File { public: constexpr GPUFile(uintptr_t file, File::ModeFlags modeflags) - : File(&write_func, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, - 0, _IONBF, false, modeflags), + : File(&write_func, nullptr, nullptr, nullptr, nullptr, nullptr, 0, + _IONBF, false, modeflags), file(file) {} uintptr_t get_file() const { return file; } diff --git a/libc/src/__support/File/linux/file.cpp b/libc/src/__support/File/linux/file.cpp index 7fb7cb587c86f..704bbd0d6035e 100644 --- a/libc/src/__support/File/linux/file.cpp +++ b/libc/src/__support/File/linux/file.cpp @@ -24,7 +24,6 @@ FileIOResult write_func(File *, const void *, size_t); FileIOResult read_func(File *, void *, size_t); ErrorOr seek_func(File *, long, int); int close_func(File *); -int flush_func(File *); } // anonymous namespace @@ -34,7 +33,7 @@ class LinuxFile : public File { public: constexpr LinuxFile(int file_descriptor, uint8_t *buffer, size_t buffer_size, int buffer_mode, bool owned, File::ModeFlags modeflags) - : File(&write_func, &read_func, &seek_func, &close_func, flush_func, + : File(&write_func, &read_func, &seek_func, &close_func, &cleanup_file, buffer, buffer_size, buffer_mode, owned, modeflags), fd(file_descriptor) {} @@ -91,15 +90,6 @@ int close_func(File *f) { return 0; } -int flush_func(File *f) { - auto *lf = reinterpret_cast(f); - int ret = __llvm_libc::syscall_impl(SYS_fsync, lf->get_fd()); - if (ret < 0) { - return -ret; - } - return 0; -} - } // anonymous namespace ErrorOr openfile(const char *path, const char *mode) { diff --git a/libc/src/__support/macros/properties/CMakeLists.txt b/libc/src/__support/macros/properties/CMakeLists.txt index 8ddd16a1e3669..f6c88c34ebb5a 100644 --- a/libc/src/__support/macros/properties/CMakeLists.txt +++ b/libc/src/__support/macros/properties/CMakeLists.txt @@ -1,17 +1,19 @@ add_header_library( - architectures - HDRS + architectures + HDRS architectures.h ) add_header_library( - compiler - HDRS + compiler + HDRS compiler.h ) add_header_library( - cpu_features - HDRS + cpu_features + HDRS cpu_features.h + DEPENDS + .architectures ) diff --git a/libc/src/__support/macros/properties/cpu_features.h b/libc/src/__support/macros/properties/cpu_features.h index 086b216e27573..493d9f446d374 100644 --- a/libc/src/__support/macros/properties/cpu_features.h +++ b/libc/src/__support/macros/properties/cpu_features.h @@ -12,6 +12,8 @@ #ifndef LLVM_LIBC_SRC_SUPPORT_MACROS_PROPERTIES_CPU_FEATURES_H #define LLVM_LIBC_SRC_SUPPORT_MACROS_PROPERTIES_CPU_FEATURES_H +#include "architectures.h" + #if defined(__SSE2__) #define LIBC_TARGET_CPU_HAS_SSE2 #endif @@ -41,4 +43,10 @@ #define LIBC_TARGET_CPU_HAS_FMA #endif +#if defined(LIBC_TARGET_ARCH_IS_AARCH64) || \ + (defined(LIBC_TARGET_ARCH_IS_X86_64) && \ + defined(LIBC_TARGET_CPU_HAS_SSE4_2)) +#define LIBC_TARGET_CPU_HAS_NEAREST_INT +#endif + #endif // LLVM_LIBC_SRC_SUPPORT_MACROS_PROPERTIES_CPU_FEATURES_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 3d5273fa90cc1..7772c6d826905 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -561,6 +561,7 @@ add_entrypoint_object( ../exp2f.h DEPENDS .explogxf + libc.src.__support.FPUtil.except_value_utils libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.multiply_add @@ -1374,6 +1375,8 @@ add_entrypoint_object( .explogxf libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.rounding_mode + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.polyeval libc.src.__support.macros.optimization COMPILE_OPTIONS -O3 diff --git a/libc/src/math/generic/coshf.cpp b/libc/src/math/generic/coshf.cpp index 8b30d51c69dce..67d2667711de9 100644 --- a/libc/src/math/generic/coshf.cpp +++ b/libc/src/math/generic/coshf.cpp @@ -23,13 +23,13 @@ LLVM_LIBC_FUNCTION(float, coshf, (float x)) { uint32_t x_u = xbits.uintval(); - // |x| <= 2^-26 - if (LIBC_UNLIKELY(x_u <= 0x3280'0000U)) { - return 1.0f + x; - } - // When |x| >= 90, or x is inf or nan - if (LIBC_UNLIKELY(x_u >= 0x42b4'0000U)) { + if (LIBC_UNLIKELY(x_u >= 0x42b4'0000U || x_u <= 0x3280'0000U)) { + // |x| <= 2^-26 + if (x_u <= 0x3280'0000U) { + return 1.0f + x; + } + if (xbits.is_inf_or_nan()) return x + FPBits::inf().get_val(); diff --git a/libc/src/math/generic/exp2f.cpp b/libc/src/math/generic/exp2f.cpp index f2c005d5e00a0..b967a4bb45371 100644 --- a/libc/src/math/generic/exp2f.cpp +++ b/libc/src/math/generic/exp2f.cpp @@ -10,11 +10,13 @@ #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/except_value_utils.h" #include "src/__support/FPUtil/multiply_add.h" #include "src/__support/FPUtil/nearest_integer.h" #include "src/__support/FPUtil/rounding_mode.h" #include "src/__support/common.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/macros/properties/cpu_features.h" #include @@ -33,13 +35,38 @@ LLVM_LIBC_FUNCTION(float, exp2f, (float x)) { uint32_t x_u = xbits.uintval(); uint32_t x_abs = x_u & 0x7fff'ffffU; - // |x| < 2^-25 - if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) { - return 1.0f + x; - } + // When |x| >= 128, or x is nan, or |x| <= 2^-5 + if (LIBC_UNLIKELY(x_abs >= 0x4300'0000U || x_abs <= 0x3d00'0000U)) { + // |x| <= 2^-5 + if (x_abs <= 0x3d00'0000) { + // |x| < 2^-25 + if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) { + return 1.0f + x; + } - // // When |x| >= 128, or x is nan - if (LIBC_UNLIKELY(x_abs >= 0x4300'0000U)) { + // Check exceptional values. + if (LIBC_UNLIKELY((x_u & EXVAL_MASK) == EXVAL_MASK)) { + if (LIBC_UNLIKELY(x_u == EXVAL1)) { // x = 0x1.853a6ep-9f + return fputil::round_result_slightly_down(0x1.00870ap+0f); + } else if (LIBC_UNLIKELY(x_u == EXVAL2)) { // x = -0x1.e7526ep-6f + return fputil::round_result_slightly_down(0x1.f58d62p-1f); + } + } + + // Minimax polynomial generated by Sollya with: + // > P = fpminimax((2^x - 1)/x, 5, [|D...|], [-2^-5, 2^-5]); + constexpr double COEFFS[] = { + 0x1.62e42fefa39f3p-1, 0x1.ebfbdff82c57bp-3, 0x1.c6b08d6f2d7aap-5, + 0x1.3b2ab6fc92f5dp-7, 0x1.5d897cfe27125p-10, 0x1.43090e61e6af1p-13}; + double xd = static_cast(x); + double xsq = xd * xd; + double c0 = fputil::multiply_add(xd, COEFFS[1], COEFFS[0]); + double c1 = fputil::multiply_add(xd, COEFFS[3], COEFFS[2]); + double c2 = fputil::multiply_add(xd, COEFFS[5], COEFFS[4]); + double p = fputil::polyeval(xsq, c0, c1, c2); + double r = fputil::multiply_add(p, xd, 1.0); + return r; + } // x >= 128 if (!xbits.get_sign()) { @@ -73,17 +100,6 @@ LLVM_LIBC_FUNCTION(float, exp2f, (float x)) { } } - // Check exceptional values. - if (LIBC_UNLIKELY((x_u & EXVAL_MASK) == EXVAL_MASK)) { - if (LIBC_UNLIKELY(x_u == EXVAL1)) { // x = 0x1.853a6ep-9f - if (fputil::fenv_is_round_to_nearest()) - return 0x1.00870ap+0f; - } else if (LIBC_UNLIKELY(x_u == EXVAL2)) { // x = -0x1.e7526ep-6f - if (fputil::fenv_is_round_to_nearest()) - return 0x1.f58d62p-1f; - } - } - // For -150 < x < 128, to compute 2^x, we perform the following range // reduction: find hi, mid, lo such that: // x = hi + mid + lo, in which @@ -101,11 +117,20 @@ LLVM_LIBC_FUNCTION(float, exp2f, (float x)) { // of 2^mid. // kf = (hi + mid) * 2^5 = round(x * 2^5) - float kf = fputil::nearest_integer(x * 32.0f); + float kf; + int k; +#ifdef LIBC_TARGET_CPU_HAS_NEAREST_INT + kf = fputil::nearest_integer(x * 32.0f); + k = static_cast(kf); +#else + constexpr float HALF[2] = {0.5f, -0.5f}; + k = static_cast(fputil::multiply_add(x, 32.0f, HALF[x < 0.0f])); + kf = static_cast(k); +#endif // LIBC_TARGET_CPU_HAS_NEAREST_INT + // dx = lo = x - (hi + mid) = x - kf * 2^(-5) double dx = fputil::multiply_add(-0x1.0p-5f, kf, x); - int k = static_cast(kf); // hi = floor(kf * 2^(-4)) // exp_hi = shift hi to the exponent field of double precision. int64_t exp_hi = diff --git a/libc/src/math/generic/explogxf.h b/libc/src/math/generic/explogxf.h index cb8efc92f28c7..827762ca48aeb 100644 --- a/libc/src/math/generic/explogxf.h +++ b/libc/src/math/generic/explogxf.h @@ -16,6 +16,7 @@ #include "src/__support/FPUtil/PolyEval.h" #include "src/__support/FPUtil/nearest_integer.h" #include "src/__support/common.h" +#include "src/__support/macros/properties/cpu_features.h" #include @@ -210,13 +211,24 @@ template LIBC_INLINE exp_b_reduc_t exp_b_range_reduc(float x) { template LIBC_INLINE double exp_pm_eval(float x) { double xd = static_cast(x); - // round(x * log2(e) * 2^5) - double kd = fputil::nearest_integer(ExpBase::LOG2_B * xd); - + // kd = round(x * log2(e) * 2^5) // k_p = round(x * log2(e) * 2^5) - int k_p = static_cast(kd); // k_m = round(-x * log2(e) * 2^5) - int k_m = -k_p; + double kd; + int k_p, k_m; + +#ifdef LIBC_TARGET_CPU_HAS_NEAREST_INT + kd = fputil::nearest_integer(ExpBase::LOG2_B * xd); + k_p = static_cast(kd); + k_m = -k_p; +#else + constexpr double HALF_WAY[2] = {0.5, -0.5}; + + k_p = static_cast( + fputil::multiply_add(xd, ExpBase::LOG2_B, HALF_WAY[x < 0.0f])); + k_m = -k_p; + kd = static_cast(k_p); +#endif // LIBC_TARGET_CPU_HAS_NEAREST_INT // hi = floor(kf * 2^(-5)) // exp_hi = shift hi to the exponent field of double precision. @@ -243,19 +255,19 @@ template LIBC_INLINE double exp_pm_eval(float x) { double dx2 = dx * dx; // c0 = 1 + COEFFS[0] * lo^2 - // P_even = 1 + COEFFS[0] * lo^2 + COEFFS[2] * lo^4 - double p_even = - fputil::polyeval(dx2, 1.0, ExpBase::COEFFS[0], ExpBase::COEFFS[2]); - // P_odd = 1 + COEFFS[1] * lo^2 + COEFFS[3] * lo^4 - double p_odd = - fputil::polyeval(dx2, 1.0, ExpBase::COEFFS[1], ExpBase::COEFFS[3]); + // P_even = (1 + COEFFS[0] * lo^2 + COEFFS[2] * lo^4) / 2 + double p_even = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[0] * 0.5, + ExpBase::COEFFS[2] * 0.5); + // P_odd = (1 + COEFFS[1] * lo^2 + COEFFS[3] * lo^4) / 2 + double p_odd = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[1] * 0.5, + ExpBase::COEFFS[3] * 0.5); double r; if constexpr (is_sinh) r = fputil::multiply_add(dx * mh_sum, p_odd, p_even * mh_diff); else r = fputil::multiply_add(dx * mh_diff, p_odd, p_even * mh_sum); - return 0.5 * r; + return r; } // x should be positive, normal finite value diff --git a/libc/src/math/generic/sinhf.cpp b/libc/src/math/generic/sinhf.cpp index 7f4d0d6e3af2e..3ebfe6ba07009 100644 --- a/libc/src/math/generic/sinhf.cpp +++ b/libc/src/math/generic/sinhf.cpp @@ -17,23 +17,43 @@ namespace __llvm_libc { LLVM_LIBC_FUNCTION(float, sinhf, (float x)) { using FPBits = typename fputil::FPBits; FPBits xbits(x); - bool sign = xbits.get_sign(); uint32_t x_abs = xbits.uintval() & FPBits::FloatProp::EXP_MANT_MASK; - // |x| <= 2^-26 - if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) { - return static_cast( - LIBC_UNLIKELY(x_abs == 0) ? x : (x + 0.25 * x * x * x)); - } - // When |x| >= 90, or x is inf or nan - if (LIBC_UNLIKELY(x_abs >= 0x42b4'0000U)) { + if (LIBC_UNLIKELY(x_abs >= 0x42b4'0000U || x_abs <= 0x3da0'0000U)) { + // |x| <= 0.078125 + if (x_abs <= 0x3da0'0000U) { + // |x| = 0.0005589424981735646724700927734375 + if (LIBC_UNLIKELY(x_abs == 0x3a12'85ffU)) { + if (fputil::fenv_is_round_to_nearest()) + return x; + } + + // |x| <= 2^-26 + if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) { + return static_cast( + LIBC_UNLIKELY(x_abs == 0) ? x : (x + 0.25 * x * x * x)); + } + + double xdbl = x; + double x2 = xdbl * xdbl; + // Sollya: fpminimax(sinh(x),[|3,5,7|],[|D...|],[-1/16-1/64;1/16+1/64],x); + // Sollya output: x * (0x1p0 + x^0x1p1 * (0x1.5555555556583p-3 + x^0x1p1 + // * (0x1.111110d239f1fp-7 + // + x^0x1p1 * 0x1.a02b5a284013cp-13))) + // Therefore, output of Sollya = x * pe; + double pe = fputil::polyeval(x2, 0.0, 0x1.5555555556583p-3, + 0x1.111110d239f1fp-7, 0x1.a02b5a284013cp-13); + return static_cast(fputil::multiply_add(xdbl, pe, xdbl)); + } + if (xbits.is_nan()) return x + 1.0f; // sNaN to qNaN + signal if (xbits.is_inf()) return x; + bool sign = xbits.get_sign(); int rounding = fputil::quick_get_round(); if (sign) { if (LIBC_UNLIKELY(rounding == FE_UPWARD || rounding == FE_TOWARDZERO)) @@ -50,26 +70,6 @@ LLVM_LIBC_FUNCTION(float, sinhf, (float x)) { return x + FPBits::inf(sign).get_val(); } - // |x| <= 0.078125 - if (LIBC_UNLIKELY(x_abs <= 0x3da0'0000U)) { - // |x| = 0.0005589424981735646724700927734375 - if (LIBC_UNLIKELY(x_abs == 0x3a12'85ffU)) { - if (fputil::fenv_is_round_to_nearest()) - return x; - } - - double xdbl = x; - double x2 = xdbl * xdbl; - // Sollya: fpminimax(sinh(x),[|3,5,7|],[|D...|],[-1/16-1/64;1/16+1/64],x); - // Sollya output: x * (0x1p0 + x^0x1p1 * (0x1.5555555556583p-3 + x^0x1p1 - // * (0x1.111110d239f1fp-7 - // + x^0x1p1 * 0x1.a02b5a284013cp-13))) - // Therefore, output of Sollya = x * pe; - double pe = fputil::polyeval(x2, 0.0, 0x1.5555555556583p-3, - 0x1.111110d239f1fp-7, 0x1.a02b5a284013cp-13); - return static_cast(fputil::multiply_add(xdbl, pe, xdbl)); - } - // sinh(x) = (e^x - e^(-x)) / 2. return static_cast(exp_pm_eval(x)); } diff --git a/libc/src/math/generic/tanhf.cpp b/libc/src/math/generic/tanhf.cpp index eb6b50a64b014..21c3ed8c8c333 100644 --- a/libc/src/math/generic/tanhf.cpp +++ b/libc/src/math/generic/tanhf.cpp @@ -8,66 +8,114 @@ #include "src/math/tanhf.h" #include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/rounding_mode.h" -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY -#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/nearest_integer.h" +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/macros/properties/cpu_features.h" #include "src/math/generic/explogxf.h" namespace __llvm_libc { +// 2^6 * log2(e) +constexpr double LOG2_E_EXP2_6 = ExpBase::LOG2_B * 2.0; + LLVM_LIBC_FUNCTION(float, tanhf, (float x)) { using FPBits = typename fputil::FPBits; FPBits xbits(x); - bool sign = xbits.get_sign(); - uint32_t x_abs = xbits.uintval() & FPBits::FloatProp::EXP_MANT_MASK; + uint32_t x_u = xbits.uintval(); + uint32_t x_abs = x_u & FPBits::FloatProp::EXP_MANT_MASK; - // |x| <= 2^-26 - if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) { - return static_cast( - LIBC_UNLIKELY(x_abs == 0) ? x : (x - 0x1.5555555555555p-2 * x * x * x)); - } + // When |x| >= 15, or x is inf or nan, or |x| <= 0.078125 + if (LIBC_UNLIKELY((x_abs >= 0x4170'0000U) || (x_abs <= 0x3da0'0000U))) { + if (x_abs <= 0x3da0'0000U) { + // |x| <= 0.078125 + if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) { + // |x| <= 2^-26 + return (x_abs != 0) + ? static_cast(x - 0x1.5555555555555p-2 * x * x * x) + : x; + } + + const double TAYLOR[] = {-0x1.5555555555555p-2, 0x1.1111111111111p-3, + -0x1.ba1ba1ba1ba1cp-5, 0x1.664f4882c10fap-6, + -0x1.226e355e6c23dp-7}; + double xdbl = x; + double x2 = xdbl * xdbl; + // Taylor polynomial. + double x4 = x2 * x2; + double c0 = x2 * TAYLOR[0]; + double c1 = fputil::multiply_add(x2, TAYLOR[2], TAYLOR[1]); + double c2 = fputil::multiply_add(x2, TAYLOR[4], TAYLOR[3]); + double pe = fputil::polyeval(x4, c0, c1, c2); - // When |x| >= 15, or x is inf or nan - if (LIBC_UNLIKELY(x_abs >= 0x4170'0000U)) { - if (xbits.is_nan()) + return static_cast(fputil::multiply_add(xdbl, pe, xdbl)); + } + + // |x| >= 15 + if (LIBC_UNLIKELY(xbits.is_nan())) return x + 1.0f; // sNaN to qNaN + signal - if (xbits.is_inf()) - return sign ? -1.0f : 1.0f; + const double SIGNS[2][2] = {{1.0f, -0x1.0p-25f}, {-1.0f, 0x1.0p-25f}}; - if (sign) { - return -1.0f + opt_barrier(FPBits(FPBits::MIN_NORMAL).get_val()); - } else - return 1.0f - opt_barrier(FPBits(FPBits::MIN_NORMAL).get_val()); - } + bool sign = xbits.get_sign(); + int idx = static_cast(sign); - // |x| <= 0.078125 - if (LIBC_UNLIKELY(x_abs <= 0x3da0'0000U)) { - double xdbl = x; - double x2 = xdbl * xdbl; - // Pure Taylor series. - double pe = fputil::polyeval(x2, 0.0, -0x1.5555555555555p-2, - 0x1.1111111111111p-3, -0x1.ba1ba1ba1ba1cp-5, - 0x1.664f4882c10fap-6, -0x1.226e355e6c23dp-7); - return static_cast(fputil::multiply_add(xdbl, pe, xdbl)); - } + if (LIBC_UNLIKELY(xbits.is_inf())) + return SIGNS[idx][0]; - if (LIBC_UNLIKELY(xbits.bits == 0x4058'e0a3U)) { - if (fputil::fenv_is_round_down()) - return FPBits(0x3f7f'6ad9U).get_val(); + return SIGNS[idx][0] + SIGNS[idx][1]; } - // Range reduction: e^(2x) = 2^(mid + hi) * e^lo - auto ep = exp_b_range_reduc(2.0f * x); // exp(2 * x) - double r = ExpBase::powb_lo(ep.lo); - // tanh(x) = (exp(2x) - 1) / (exp(2x) + 1) -#if defined(LIBC_TARGET_CPU_HAS_FMA) - return static_cast(fputil::multiply_add(ep.mh, r, -1.0) / - fputil::multiply_add(ep.mh, r, 1.0)); + // Range reduction: e^(2x) = 2^(hi + mid) * e^lo + // Let k = round( x * 2^6 * log2(e)), + // So k = (hi + mid) * 2^5 + // Then lo = 2x - (hi + mid) * log(2) = 2x - k * 2^-5 * log(2). + + double xd = static_cast(x); + // k = round( x* 2^6 * log2(e) ) + double k; + // mk = -k + int mk; +#ifdef LIBC_TARGET_CPU_HAS_NEAREST_INT + k = fputil::nearest_integer(xd * LOG2_E_EXP2_6); + mk = -static_cast(k); #else - double exp_x = ep.mh * r; - return static_cast((exp_x - 1.0) / (exp_x + 1.0)); -#endif // LIBC_TARGET_CPU_HAS_FMA + constexpr double HALF_WAY[2] = {-0.5, 0.5}; + + mk = static_cast( + fputil::multiply_add(xd, -LOG2_E_EXP2_6, HALF_WAY[xbits.get_sign()])); + k = static_cast(-mk); +#endif // LIBC_TARGET_CPU_HAS_NEAREST_INT + // -hi = floor(-k * 2^(-MID_BITS)) + // exp_mhi = shift -hi to the exponent field of double precision. + int64_t exp_mhi = static_cast(mk >> ExpBase::MID_BITS) + << fputil::FloatProperties::MANTISSA_WIDTH; + // mh = 2^(-hi - mid) + int64_t mh_bits = ExpBase::EXP_2_MID[mk & ExpBase::MID_MASK] + exp_mhi; + double mh = fputil::FPBits(uint64_t(mh_bits)).get_val(); + // dx = lo/2 = x - (hi + mid) * log(2)/2 = x - k * 2^-6 * log(2) + double dx = fputil::multiply_add( + k, ExpBase::M_LOGB_2_LO * 0.5, + fputil::multiply_add(k, ExpBase::M_LOGB_2_HI * 0.5, xd)); + + // > P = fpminimax(expm1(2*x)/x, 4, [|D...|], [-log(2)/128, log(2)/128]); + constexpr double COEFFS[] = {0x1.ffffffffe5bc8p0, 0x1.555555555cd67p0, + 0x1.5555c2a9b48b4p-1, 0x1.11112a0e34bdbp-2}; + + double dx2 = dx * dx; + double c0 = fputil::multiply_add(dx, 2.0, 1.0); + double c1 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]); + double c2 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]); + double r = fputil::polyeval(dx2, c0, c1, c2); + + // tanh(x) = sinh(x) / cosh(x) + // = (e^x - e^(-x)) / (e^x + e^(-x)) + // = (e^(2x) - 1) / (e^(2x) + 1) + // = (2^(hi + mid) * e^lo - 1) / (2^(hi + mid) * e^lo + 1) + // = (e^lo - 2^(-hi - mid)) / (e^lo + 2^(-hi - mid)) + // = (r - mh) / (r + mh) + return static_cast((r - mh) / (r + mh)); } } // namespace __llvm_libc diff --git a/libc/src/stdio/fopencookie.cpp b/libc/src/stdio/fopencookie.cpp index da72ed3318105..4c8dbe2e796a7 100644 --- a/libc/src/stdio/fopencookie.cpp +++ b/libc/src/stdio/fopencookie.cpp @@ -26,15 +26,13 @@ class CookieFile : public __llvm_libc::File { static FileIOResult cookie_read(File *f, void *data, size_t size); static ErrorOr cookie_seek(File *f, long offset, int whence); static int cookie_close(File *f); - static int cookie_flush(File *); public: CookieFile(void *c, cookie_io_functions_t cops, uint8_t *buffer, size_t bufsize, File::ModeFlags mode) : File(&cookie_write, &cookie_read, &CookieFile::cookie_seek, - &cookie_close, &cookie_flush, &cleanup_file, buffer, - bufsize, 0 /* default buffering mode */, - true /* File owns buffer */, mode), + &cookie_close, &cleanup_file, buffer, bufsize, + 0 /* default buffering mode */, true /* File owns buffer */, mode), cookie(c), ops(cops) {} }; @@ -74,8 +72,6 @@ int CookieFile::cookie_close(File *f) { return cookie_file->ops.close(cookie_file->cookie); } -int CookieFile::cookie_flush(File *) { return 0; } - } // anonymous namespace LLVM_LIBC_FUNCTION(::FILE *, fopencookie, diff --git a/libc/test/src/__support/File/file_test.cpp b/libc/test/src/__support/File/file_test.cpp index c8cb1e3daea26..1446f66a01598 100644 --- a/libc/test/src/__support/File/file_test.cpp +++ b/libc/test/src/__support/File/file_test.cpp @@ -33,13 +33,12 @@ class StringFile : public File { size_t len); static ErrorOr str_seek(__llvm_libc::File *f, long offset, int whence); static int str_close(__llvm_libc::File *f) { return 0; } - static int str_flush(__llvm_libc::File *f) { return 0; } public: explicit StringFile(char *buffer, size_t buflen, int bufmode, bool owned, ModeFlags modeflags) : __llvm_libc::File(&str_write, &str_read, &str_seek, &str_close, - &str_flush, &__llvm_libc::cleanup_file, + &__llvm_libc::cleanup_file, reinterpret_cast(buffer), buflen, bufmode, owned, modeflags), pos(0), eof_marker(0), write_append(false) { diff --git a/libc/test/src/math/tanhf_test.cpp b/libc/test/src/math/tanhf_test.cpp index f4a4b72dd162a..07afe14a263d0 100644 --- a/libc/test/src/math/tanhf_test.cpp +++ b/libc/test/src/math/tanhf_test.cpp @@ -43,35 +43,31 @@ TEST(LlvmLibcTanhfTest, SpecialNumbers) { } TEST(LlvmLibcTanhfTest, InFloatRange) { - constexpr uint32_t COUNT = 100'000; + constexpr uint32_t COUNT = 100'001; constexpr uint32_t STEP = UINT32_MAX / COUNT; for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) { float x = float(FPBits(v)); if (isnan(x) || isinf(x)) continue; - ASSERT_MPFR_MATCH(mpfr::Operation::Tanh, x, __llvm_libc::tanhf(x), 0.5); + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Tanh, x, + __llvm_libc::tanhf(x), 0.5); } } -// For small values, tanh(x) is x. -TEST(LlvmLibcTanhfTest, SmallValues) { - float x = float(FPBits(uint32_t(0x17800000))); - float result = __llvm_libc::tanhf(x); - EXPECT_MPFR_MATCH(mpfr::Operation::Tanh, x, result, 0.5); - EXPECT_FP_EQ(x, result); - - x = float(FPBits(uint32_t(0x00400000))); - result = __llvm_libc::tanhf(x); - EXPECT_MPFR_MATCH(mpfr::Operation::Tanh, x, result, 0.5); - EXPECT_FP_EQ(x, result); -} - TEST(LlvmLibcTanhfTest, ExceptionalValues) { - float x = float(FPBits(uint32_t(0x3a12'85ffU))); - EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Tanh, x, - __llvm_libc::tanhf(x), 0.5); - - x = -float(FPBits(uint32_t(0x3a12'85ffU))); - EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Tanh, x, - __llvm_libc::tanhf(x), 0.5); + constexpr int N = 4; + constexpr uint32_t INPUTS[N] = { + 0x0040'0000, + 0x1780'0000, + 0x3a12'85ff, + 0x4058'e0a3, + }; + + for (int i = 0; i < N; ++i) { + float x = float(FPBits(INPUTS[i])); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Tanh, x, + __llvm_libc::tanhf(x), 0.5); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Tanh, -x, + __llvm_libc::tanhf(-x), 0.5); + } } diff --git a/libc/test/src/stdio/ftell_test.cpp b/libc/test/src/stdio/ftell_test.cpp index e5e4cc4aba4f1..fec881db4d888 100644 --- a/libc/test/src/stdio/ftell_test.cpp +++ b/libc/test/src/stdio/ftell_test.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/stdio/fclose.h" -#include "src/stdio/fflush.h" #include "src/stdio/fopen.h" #include "src/stdio/fread.h" #include "src/stdio/fseek.h" diff --git a/libcxx/benchmarks/CartesianBenchmarks.h b/libcxx/benchmarks/CartesianBenchmarks.h index 2eea156819330..eca4e15cd009b 100644 --- a/libcxx/benchmarks/CartesianBenchmarks.h +++ b/libcxx/benchmarks/CartesianBenchmarks.h @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// - #include #include #include @@ -22,7 +21,7 @@ struct EnumValue : std::integral_constant(I)> { static std::string name() { return std::string("_") + D::Names[I]; } }; -template +template constexpr auto makeEnumValueTuple(std::index_sequence) { return std::make_tuple(EnumValue{}...); } @@ -41,8 +40,7 @@ void makeBenchmarkFromValuesImpl(const Args& A, std::index_sequence) { for (auto& V : A) { B Bench{std::get(V)...}; if (!internal::skip(Bench, 0)) { - benchmark::RegisterBenchmark(Bench.name().c_str(), - [=](benchmark::State& S) { Bench.run(S); }); + benchmark::RegisterBenchmark(Bench.name().c_str(), [=](benchmark::State& S) { Bench.run(S); }); } } } @@ -57,10 +55,8 @@ void makeBenchmarkImpl(const Args& A, std::tuple t) { makeBenchmarkFromValues >(A); } -template