diff --git a/Sources/FoundationEssentials/String/String+IO.swift b/Sources/FoundationEssentials/String/String+IO.swift index 55f8e19ae..6fe6d2542 100644 --- a/Sources/FoundationEssentials/String/String+IO.swift +++ b/Sources/FoundationEssentials/String/String+IO.swift @@ -24,6 +24,11 @@ dynamic public func _cfMakeStringFromBytes(_ bytes: UnsafeBufferPointer, // Provide swift-corelibs-foundation with an entry point to convert some bytes into a String return nil } + +dynamic package func _icuMakeStringFromBytes(_ bytes: UnsafeBufferPointer, encoding: String.Encoding) -> String? { + // Concrete implementation is provided by FoundationInternationalization. + return nil +} #endif @available(macOS 10.10, iOS 8.0, watchOS 2.0, tvOS 9.0, *) @@ -202,8 +207,14 @@ extension String { return nil } #else - if let string = (bytes.withContiguousStorageIfAvailable({ _cfMakeStringFromBytes($0, encoding: encoding.rawValue) }) ?? - Array(bytes).withUnsafeBufferPointer({ _cfMakeStringFromBytes($0, encoding: encoding.rawValue) })) { + func makeString(from bytes: UnsafeBufferPointer) -> String? { + return ( + _cfMakeStringFromBytes(bytes, encoding: encoding.rawValue) ?? + _icuMakeStringFromBytes(bytes, encoding: encoding) + ) + } + if let string = (bytes.withContiguousStorageIfAvailable({ makeString(from: $0) }) ?? + Array(bytes).withUnsafeBufferPointer({ makeString(from: $0) })) { self = string } else { return nil diff --git a/Sources/FoundationEssentials/String/StringProtocol+Essentials.swift b/Sources/FoundationEssentials/String/StringProtocol+Essentials.swift index 4c99c1599..ee277c586 100644 --- a/Sources/FoundationEssentials/String/StringProtocol+Essentials.swift +++ b/Sources/FoundationEssentials/String/StringProtocol+Essentials.swift @@ -91,6 +91,11 @@ dynamic public func _cfStringEncodingConvert(string: String, using encoding: UIn // Dynamically replaced by swift-corelibs-foundation to implement encodings that we do not have Swift replacements for, yet return nil } + +dynamic package func _icuStringEncodingConvert(string: String, using encoding: String.Encoding, allowLossyConversion: Bool) -> Data? { + // Concrete implementation is provided by FoundationInternationalization. + return nil +} #endif @available(FoundationPreview 0.4, *) @@ -255,8 +260,12 @@ extension String { // Other encodings, defer to the CoreFoundation implementation return _ns.data(using: encoding.rawValue, allowLossyConversion: allowLossyConversion) #else - // Attempt an up-call into swift-corelibs-foundation, which can defer to the CoreFoundation implementation - return _cfStringEncodingConvert(string: self, using: encoding.rawValue, allowLossyConversion: allowLossyConversion) + return ( + // Attempt an up-call into swift-corelibs-foundation, which can defer to the CoreFoundation implementation + _cfStringEncodingConvert(string: self, using: encoding.rawValue, allowLossyConversion: allowLossyConversion) ?? + // Or attempt an up-call into ICU via FoundationInternationalization + _icuStringEncodingConvert(string: self, using: encoding, allowLossyConversion: allowLossyConversion) + ) #endif } } diff --git a/Sources/FoundationInternationalization/ICU/ICU+StringConverter.swift b/Sources/FoundationInternationalization/ICU/ICU+StringConverter.swift new file mode 100644 index 000000000..511f81f1f --- /dev/null +++ b/Sources/FoundationInternationalization/ICU/ICU+StringConverter.swift @@ -0,0 +1,202 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2025 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +#if canImport(FoundationEssentials) +import FoundationEssentials +#endif +internal import _FoundationICU + +private extension String.Encoding { + var _icuConverterName: String? { + // TODO: Replace this with forthcoming(?) public property such as https://github.com/swiftlang/swift-foundation/pull/1243 + switch self { + case .utf8: "UTF-8" + case .ascii: "US-ASCII" + case .japaneseEUC: "EUC-JP" + case .isoLatin1: "ISO-8859-1" + case .shiftJIS: "Shift_JIS" + case .isoLatin2: "ISO-8859-2" + case .unicode: "UTF-16" + case .windowsCP1251: "windows-1251" + case .windowsCP1252: "windows-1252" + case .windowsCP1253: "windows-1253" + case .windowsCP1254: "windows-1254" + case .windowsCP1250: "windows-1250" + case .iso2022JP: "ISO-2022-JP" + case .macOSRoman: "macintosh" + case .utf16BigEndian: "UTF-16BE" + case .utf16LittleEndian: "UTF-16LE" + case .utf32: "UTF-32" + case .utf32BigEndian: "UTF-32BE" + case .utf32LittleEndian: "UTF-32LE" + default: nil + } + } +} + +extension ICU { + final class StringConverter: @unchecked Sendable { + private let _converter: LockedState // UConverter* + + let encoding: String.Encoding + + init?(encoding: String.Encoding) { + guard let convName = encoding._icuConverterName else { + return nil + } + var status: UErrorCode = U_ZERO_ERROR + guard let converter = ucnv_open(convName, &status), status.isSuccess else { + return nil + } + self._converter = LockedState(initialState: converter) + self.encoding = encoding + } + + deinit { + _converter.withLock { ucnv_close($0) } + } + } +} + +extension ICU.StringConverter { + func decode(data: Data) -> String? { + return _converter.withLock { converter in + defer { + ucnv_resetToUnicode(converter) + } + + let srcLength = CInt(data.count) + let initCapacity = srcLength * CInt(ucnv_getMinCharSize(converter)) + 1 + return _withResizingUCharBuffer(initialSize: initCapacity) { (dest, capacity, status) in + return data.withUnsafeBytes { src in + ucnv_toUChars( + converter, + dest, + capacity, + src.baseAddress, + srcLength, + &status + ) + } + } + } + } + + func encode(string: String, allowLossyConversion lossy: Bool) -> Data? { + return _converter.withLock { (converter) -> Data? in + defer { + ucnv_resetFromUnicode(converter) + } + + let utf16Rep = string.utf16 + let uchars = UnsafeMutableBufferPointer.allocate(capacity: utf16Rep.count) + _ = uchars.initialize(fromContentsOf: utf16Rep) + defer { + uchars.deallocate() + } + + let srcLength = uchars.count + let capacity = srcLength * Int(ucnv_getMaxCharSize(converter)) + 1 + let dest = UnsafeMutableRawPointer.allocate( + byteCount: capacity, + alignment: MemoryLayout.alignment + ) + + var status: UErrorCode = U_ZERO_ERROR + if lossy { + var lossyChar: UChar = encoding == .ascii ? 0xFF : 0x3F + ucnv_setSubstString( + converter, + &lossyChar, + 1, + &status + ) + guard status.isSuccess else { return nil } + + ucnv_setFromUCallBack( + converter, + UCNV_FROM_U_CALLBACK_SUBSTITUTE, + nil, // newContext + nil, // oldAction + nil, // oldContext + &status + ) + guard status.isSuccess else { return nil } + } else { + ucnv_setFromUCallBack( + converter, + UCNV_FROM_U_CALLBACK_STOP, + nil, // newContext + nil, // oldAction + nil, // oldContext + &status + ) + guard status.isSuccess else { return nil } + } + + let actualLength = ucnv_fromUChars( + converter, + dest, + CInt(capacity), + uchars.baseAddress, + CInt(srcLength), + &status + ) + guard status.isSuccess else { return nil } + return Data( + bytesNoCopy: dest, + count: Int(actualLength), + deallocator: .custom({ pointer, _ in pointer.deallocate() }) + ) + } + } +} + +extension ICU.StringConverter { + nonisolated(unsafe) static private var _converters: LockedState<[String.Encoding: ICU.StringConverter]> = .init(initialState: [:]) + + static func converter(for encoding: String.Encoding) -> ICU.StringConverter? { + return _converters.withLock { + if let converter = $0[encoding] { + return converter + } + if let converter = ICU.StringConverter(encoding: encoding) { + $0[encoding] = converter + return converter + } + return nil + } + } +} + + +@_dynamicReplacement(for: _icuMakeStringFromBytes(_:encoding:)) +func _icuMakeStringFromBytes_impl(_ bytes: UnsafeBufferPointer, encoding: String.Encoding) -> String? { + guard let converter = ICU.StringConverter.converter(for: encoding), + let pointer = bytes.baseAddress else { + return nil + } + let data = Data( + bytesNoCopy: UnsafeMutableRawPointer(mutating: pointer), + count: bytes.count, + deallocator: .none + ) + return converter.decode(data: data) +} + +@_dynamicReplacement(for: _icuStringEncodingConvert(string:using:allowLossyConversion:)) +func _icuStringEncodingConvert_impl(string: String, using encoding: String.Encoding, allowLossyConversion: Bool) -> Data? { + guard let converter = ICU.StringConverter.converter(for: encoding) else { + return nil + } + return converter.encode(string: string, allowLossyConversion: allowLossyConversion) +} diff --git a/Tests/FoundationInternationalizationTests/StringTests+Data.swift b/Tests/FoundationInternationalizationTests/StringTests+Data.swift new file mode 100644 index 000000000..7c0babc26 --- /dev/null +++ b/Tests/FoundationInternationalizationTests/StringTests+Data.swift @@ -0,0 +1,126 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2025 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +#if FOUNDATION_FRAMEWORK +@testable import Foundation +#else +@testable import FoundationEssentials +@testable import FoundationInternationalization +#endif // FOUNDATION_FRAMEWORK + +#if canImport(TestSupport) +import TestSupport +#endif + +final class StringConverterTests: XCTestCase { + private func _test_roundTripConversion( + string: String, + data: Data, + encoding: String._Encoding, + file: StaticString = #filePath, + line: UInt = #line + ) { + XCTAssertEqual( + string.data(using: encoding), data, "Failed to convert string to data.", + file: file, line: line + ) + XCTAssertEqual( + string, String(data: data, encoding: encoding), "Failed to convert data to string.", + file: file, line: line + ) + } + + func test_japaneseEUC() { + // Confirm that https://github.com/swiftlang/swift-foundation/issues/1016 is fixed. + + // ASCII + _test_roundTripConversion( + string: "ABC", + data: Data([0x41, 0x42, 0x43]), + encoding: .japaneseEUC + ) + + // Plane 1 Row 1 + _test_roundTripConversion( + string: "、。◇", + data: Data([ + 0xA1, 0xA2, + 0xA1, 0xA3, + 0xA1, 0xFE, + ]), + encoding: .japaneseEUC + ) + + // Plane 1 Row 4 (Hiragana) + _test_roundTripConversion( + string: "ひらがな", + data: Data([ + 0xA4, 0xD2, + 0xA4, 0xE9, + 0xA4, 0xAC, + 0xA4, 0xCA, + ]), + encoding: .japaneseEUC + ) + + // Plane 1 Row 5 (Katakana) + _test_roundTripConversion( + string: "ヴヵヶ", + data: Data([ + 0xA5, 0xF4, + 0xA5, 0xF5, + 0xA5, 0xF6, + ]), + encoding: .japaneseEUC + ) + + // Plane 1 Row 6 (Greek Alphabets) + _test_roundTripConversion( + string: "Σπ", + data: Data([ + 0xA6, 0xB2, + 0xA6, 0xD0, + ]), + encoding: .japaneseEUC + ) + + // Basic Kanji + _test_roundTripConversion( + string: "日本", + data: Data([ + 0xC6, 0xFC, + 0xCB, 0xDC, + ]), + encoding: .japaneseEUC + ) + + // Amendment by JIS83/JIS90 + _test_roundTripConversion( + string: "扉⇔穴", + data: Data([ + 0xC8, 0xE2, + 0xA2, 0xCE, + 0xB7, 0xEA, + ]), + encoding: .japaneseEUC + ) + + // Unsupported characters + let sushi = "Sushi🍣" + XCTAssertNil(sushi.data(using: String._Encoding.japaneseEUC)) + XCTAssertEqual( + sushi.data(using: String._Encoding.japaneseEUC, allowLossyConversion: true), + "Sushi?".data(using: .utf8) + ) + } +} +