Skip to content

[Do not merge] Unicode Normalization APIs #75298

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions stdlib/public/Concurrency/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ set(SWIFT_RUNTIME_CONCURRENCY_SWIFT_SOURCES
ContinuousClock.swift
SuspendingClock.swift
TaskSleepDuration.swift
Unicode+NormalizedScalarsAsync.swift
)

add_swift_target_library(swift_Concurrency ${SWIFT_STDLIB_LIBRARY_BUILD_TYPES} IS_STDLIB
Expand Down
324 changes: 324 additions & 0 deletions stdlib/public/Concurrency/Unicode+NormalizedScalarsAsync.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,324 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2023 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

import Swift

@available(SwiftStdlib 9999, *)
extension AsyncSequence where Element == Unicode.Scalar {

/// Normalized representations of this sequence's contents.
///
@inlinable
public var normalized: Unicode.NormalizedScalars<Self> {
Unicode.NormalizedScalars(self)
}
}

@available(SwiftStdlib 9999, *)
extension Unicode.NormalizedScalars
where Source: AsyncSequence, Source.Element == Unicode.Scalar {

/// The contents of the source sequence, in Normalization Form D.
///
/// Normalization to NFD preserves canonical equivalence.
///
@inlinable
public var nfd: AsyncNFD {
AsyncNFD(source: source)
}

/// The contents of the source sequence, in Normalization Form D.
///
/// Normalization to NFD preserves canonical equivalence.
///
@frozen
public struct AsyncNFD: AsyncSequence {

public var source: Source

@inlinable
internal init(source: Source) {
self.source = source
}

@inlinable
public func makeAsyncIterator() -> AsyncIterator {
AsyncIterator(source: source.makeAsyncIterator())
}

@frozen
public struct AsyncIterator: AsyncIteratorProtocol {

public typealias Element = Unicode.Scalar
public typealias Failure = Source.Failure

public var source: Source.AsyncIterator

@usableFromInline
internal var normalizer = Unicode.NFDNormalizer()
@usableFromInline
internal var pending = Optional<Unicode.Scalar>.none

@inlinable
internal init(source: Source.AsyncIterator) {
self.source = source
}

@inlinable
public mutating func next(
isolation actor: isolated (any Actor)?
) async throws(Source.Failure) -> Unicode.Scalar? {

// Equivalent to: "pending.take() ?? try await source.next()"
func _pendingOrNextFromSource()
async throws(Source.Failure) -> Unicode.Scalar? {
if pending != nil { return pending.take() }
return try await source.next(isolation: actor)
}

while let scalar = try await _pendingOrNextFromSource() {
var iter = CollectionOfOne(scalar).makeIterator()
if let output = normalizer.resume(consuming: &iter) {
pending = iter.next()
return output
}
}
return normalizer.flush()
}
}
}
}

@available(SwiftStdlib 9999, *)
extension Unicode.NormalizedScalars
where Source: AsyncSequence, Source.Element == Unicode.Scalar {

/// The contents of the source sequence, in Normalization Form C.
///
/// Normalization to NFC preserves canonical equivalence.
///
@inlinable
public var nfc: AsyncNFC {
AsyncNFC(source: source)
}

/// The contents of the source sequence, in Normalization Form C.
///
/// Normalization to NFC preserves canonical equivalence.
///
@frozen
public struct AsyncNFC: AsyncSequence {

public var source: Source

@inlinable
internal init(source: Source) {
self.source = source
}

@inlinable
public func makeAsyncIterator() -> AsyncIterator {
AsyncIterator(source: source.makeAsyncIterator())
}

@frozen
public struct AsyncIterator: AsyncIteratorProtocol {

public typealias Element = Unicode.Scalar
public typealias Failure = Source.Failure

public var source: Source.AsyncIterator

@usableFromInline
internal var normalizer = Unicode.NFCNormalizer()
@usableFromInline
internal var pending = Optional<Unicode.Scalar>.none

@inlinable
internal init(source: Source.AsyncIterator) {
self.source = source
}

@inlinable
public mutating func next(
isolation actor: isolated (any Actor)?
) async throws(Source.Failure) -> Unicode.Scalar? {

// Equivalent to: "pending.take() ?? try await source.next()"
func _pendingOrNextFromSource()
async throws(Source.Failure) -> Unicode.Scalar? {
if pending != nil { return pending.take() }
return try await source.next(isolation: actor)
}

while let scalar = try await _pendingOrNextFromSource() {
var iter = CollectionOfOne(scalar).makeIterator()
if let output = normalizer.resume(consuming: &iter) {
pending = iter.next()
return output
}
}
return normalizer.flush()
}
}
}
}

@available(SwiftStdlib 9999, *)
extension Unicode.NormalizedScalars
where Source: AsyncSequence, Source.Element == Unicode.Scalar {

/// The contents of the source sequence, in Normalization Form KD.
///
/// Normalization to NFKD does _not_ preserve canonical equivalence.
///
@inlinable
public var nfkd: AsyncNFKD {
AsyncNFKD(source: source)
}

/// The contents of the source sequence, in Normalization Form KD.
///
/// Normalization to NFKD does _not_ preserve canonical equivalence.
///
@frozen
public struct AsyncNFKD: AsyncSequence {

public var source: Source

@inlinable
internal init(source: Source) {
self.source = source
}

@inlinable
public func makeAsyncIterator() -> AsyncIterator {
AsyncIterator(source: source.makeAsyncIterator())
}

@frozen
public struct AsyncIterator: AsyncIteratorProtocol {

public typealias Element = Unicode.Scalar
public typealias Failure = Source.Failure

public var source: Source.AsyncIterator

@usableFromInline
internal var normalizer = Unicode.NFKDNormalizer()
@usableFromInline
internal var pending = Optional<Unicode.Scalar>.none

@inlinable
internal init(source: Source.AsyncIterator) {
self.source = source
}

@inlinable
public mutating func next(
isolation actor: isolated (any Actor)?
) async throws(Source.Failure) -> Unicode.Scalar? {

// Equivalent to: "pending.take() ?? try await source.next()"
func _pendingOrNextFromSource()
async throws(Source.Failure) -> Unicode.Scalar? {
if pending != nil { return pending.take() }
return try await source.next(isolation: actor)
}

while let scalar = try await _pendingOrNextFromSource() {
var iter = CollectionOfOne(scalar).makeIterator()
if let output = normalizer.resume(consuming: &iter) {
pending = iter.next()
return output
}
}
return normalizer.flush()
}
}
}
}

@available(SwiftStdlib 9999, *)
extension Unicode.NormalizedScalars
where Source: AsyncSequence, Source.Element == Unicode.Scalar {

/// The contents of the source sequence, in Normalization Form KC.
///
/// Normalization to NFKC does _not_ preserve canonical equivalence.
///
@inlinable
public var nfkc: AsyncNFKC {
AsyncNFKC(source: source)
}

/// The contents of the source sequence, in Normalization Form KC.
///
/// Normalization to NFKC does _not_ preserve canonical equivalence.
///
@frozen
public struct AsyncNFKC: AsyncSequence {

public var source: Source

@inlinable
internal init(source: Source) {
self.source = source
}

@inlinable
public func makeAsyncIterator() -> AsyncIterator {
AsyncIterator(source: source.makeAsyncIterator())
}

@frozen
public struct AsyncIterator: AsyncIteratorProtocol {

public typealias Element = Unicode.Scalar
public typealias Failure = Source.Failure

public var source: Source.AsyncIterator

@usableFromInline
internal var normalizer = Unicode.NFKCNormalizer()
@usableFromInline
internal var pending = Optional<Unicode.Scalar>.none

@inlinable
internal init(source: Source.AsyncIterator) {
self.source = source
}

@inlinable
public mutating func next(
isolation actor: isolated (any Actor)?
) async throws(Source.Failure) -> Unicode.Scalar? {

// Equivalent to: "pending.take() ?? try await source.next()"
func _pendingOrNextFromSource()
async throws(Source.Failure) -> Unicode.Scalar? {
if pending != nil { return pending.take() }
return try await source.next(isolation: actor)
}

while let scalar = try await _pendingOrNextFromSource() {
var iter = CollectionOfOne(scalar).makeIterator()
if let output = normalizer.resume(consuming: &iter) {
pending = iter.next()
return output
}
}
return normalizer.flush()
}
}
}
}
9 changes: 9 additions & 0 deletions stdlib/public/SwiftShims/swift/shims/UnicodeData.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,21 @@ __swift_intptr_t _swift_stdlib_getScalarBitArrayIdx(__swift_uint32_t scalar,
SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_uint16_t _swift_stdlib_getNormData(__swift_uint32_t scalar);

SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_uint16_t _swift_stdlib_getCompatibilityNormData(__swift_uint32_t scalar);

SWIFT_RUNTIME_STDLIB_INTERNAL
const __swift_uint8_t * const _swift_stdlib_nfd_decompositions;

SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_uint32_t _swift_stdlib_getDecompositionEntry(__swift_uint32_t scalar);

SWIFT_RUNTIME_STDLIB_INTERNAL
const __swift_uint8_t * const _swift_stdlib_nfkd_decompositions;

SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_uint32_t _swift_stdlib_getCompatibilityDecompositionEntry(__swift_uint32_t scalar);

SWIFT_RUNTIME_STDLIB_INTERNAL
__swift_uint32_t _swift_stdlib_getComposition(__swift_uint32_t x,
__swift_uint32_t y);
Expand Down
4 changes: 4 additions & 0 deletions stdlib/public/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ split_embedded_sources(
NORMAL NewtypeWrapper.swift
EMBEDDED NFC.swift
EMBEDDED NFD.swift
EMBEDDED NFKC.swift
EMBEDDED NFKD.swift
EMBEDDED ObjectIdentifier.swift
EMBEDDED Optional.swift
EMBEDDED OptionSet.swift
Expand Down Expand Up @@ -193,6 +195,8 @@ split_embedded_sources(
NORMAL ThreadLocalStorage.swift
EMBEDDED UIntBuffer.swift
EMBEDDED UnavailableStringAPIs.swift
EMBEDDED Unicode+NormalizedScalars.swift
EMBEDDED UnicodeNormalizationCheck.swift
EMBEDDED UnicodeData.swift
EMBEDDED UnicodeEncoding.swift
EMBEDDED UnicodeBreakProperty.swift
Expand Down
Loading