From d213ae0b3e594eeb029751317b10ef088ec57229 Mon Sep 17 00:00:00 2001 From: taylorswift Date: Sat, 12 Nov 2022 15:21:02 -0600 Subject: [PATCH] make Base16 unsafe API slightly safer, add documentation (#4) --- README.md | 24 ++++++- Sources/Base16/Base16.swift | 97 ++++++++++++++++++----------- Sources/Base64/Base64.Input.swift | 16 +++-- Sources/Base64/Base64.swift | 31 +++++++++ Sources/BaseDigits/BaseDigits.swift | 2 + 5 files changed, 125 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index bf28cec..5dd1dbb 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@
-***`hash`***
`0.2.3` +***`hash`***
`0.3.0` [![ci status](https://github.com/kelvin13/swift-hash/actions/workflows/build.yml/badge.svg)](https://github.com/kelvin13/swift-hash/actions/workflows/build.yml) [![ci status](https://github.com/kelvin13/swift-hash/actions/workflows/build-devices.yml/badge.svg)](https://github.com/kelvin13/swift-hash/actions/workflows/build-devices.yml) @@ -12,4 +12,24 @@
-*`swift-hash`* is an inline-only microframework providing generic, pure-Swift implementations of the [SHA-2](https://en.wikipedia.org/wiki/SHA-2) and HMAC-SHA-2 hashing functions. +*`swift-hash`* is an inline-only microframework providing generic, pure-Swift implementations of various hashes, checksums, and binary utilities. + +## products + +The package vends the following library products: + +1. [`Base16`](Sources/Base16) + + Tools for encoding to and decoding from base-16 strings. + +1. [`Base64`](Sources/Base64) + + Tools for encoding to and decoding from base-64 strings. + +1. [`CRC`](Sources/CRC) + + Implements [CRC-32](https://en.wikipedia.org/wiki/Cyclic_redundancy_check) checksums. + +1. [`SHA2`](Sources/SHA2) + + Implements the [SHA-256](https://en.wikipedia.org/wiki/SHA-2) and HMAC-SHA-256 hashing functions. \ No newline at end of file diff --git a/Sources/Base16/Base16.swift b/Sources/Base16/Base16.swift index 41c1fec..f059194 100644 --- a/Sources/Base16/Base16.swift +++ b/Sources/Base16/Base16.swift @@ -1,8 +1,17 @@ import BaseDigits +/// A namespace for base-16 utilities. public -enum Base16 +enum Base16 { + /// Decodes some ``String``-like type containing an ASCII-encoded base-16 string + /// to some ``RangeReplaceableCollection`` type. The order of the decoded bytes + /// in the output matches the order of the (pairs of) hexadecimal digits in the + /// input string. + /// + /// Characters (including UTF-8 continuation bytes) that are not base-16 digits + /// will be interpreted as zeros. If the string does not contain an even number + /// of digits, the trailing digit will be ignored. @inlinable public static func decode(_ ascii:ASCII, to _:Bytes.Type = Bytes.self) -> Bytes where Bytes:RangeReplaceableCollection, Bytes.Element == UInt8, @@ -10,6 +19,13 @@ enum Base16 { self.decode(ascii.utf8, to: Bytes.self) } + /// Decodes an ASCII-encoded base-16 string to some ``RangeReplaceableCollection`` type. + /// The order of the decoded bytes in the output matches the order of the (pairs of) + /// hexadecimal digits in the input. + /// + /// Characters (including UTF-8 continuation bytes) that are not base-16 digits + /// will be interpreted as zeros. If the input does not yield an even number of + /// digits, the trailing digit will be ignored. @inlinable public static func decode(_ ascii:ASCII, to _:Bytes.Type = Bytes.self) -> Bytes where Bytes:RangeReplaceableCollection, Bytes.Element == UInt8, @@ -25,7 +41,7 @@ enum Base16 } return bytes } - + /// Encodes a sequence of bytes to a base-16 string with the specified lettercasing. @inlinable public static func encode(_ bytes:Bytes, with _:Digits.Type) -> String where Bytes:Sequence, Bytes.Element == UInt8, Digits:BaseDigits @@ -42,6 +58,12 @@ enum Base16 } extension Base16 { + /// Decodes an ASCII-encoded base-16 string into a pre-allocated buffer, + /// returning [`nil`]() if the input did not yield enough bytes to fill + /// the buffer completely. + /// + /// Characters (including UTF-8 continuation bytes) that are not base-16 digits + /// will be interpreted as zeros. @inlinable public static func decode(_ ascii:ASCII, into bytes:UnsafeMutableRawBufferPointer) -> Void? @@ -62,6 +84,13 @@ extension Base16 } return () } + /// Encodes a sequence of bytes into a pre-allocated buffer as a base-16 + /// string with the specified lettercasing. + /// + /// The size of the `ascii` buffer must be exactly twice the inline size + /// of `words`. If this method is used incorrectly, the output buffer may + /// be incompletely initialized, but it will never write to memory outside + /// of the buffer’s bounds. @inlinable public static func encode(storing words:BigEndian, into ascii:UnsafeMutableRawBufferPointer, @@ -72,21 +101,22 @@ extension Base16 { assert(2 * $0.count <= ascii.count) - var offset:Int = ascii.startIndex - for byte:UInt8 in $0 + for (offset, byte):(Int, UInt8) + in zip(stride(from: ascii.startIndex, to: ascii.endIndex, by: 2), $0) { - ascii[offset] = Digits[byte >> 4] - ascii.formIndex(after: &offset) - ascii[offset] = Digits[byte & 0x0f] - ascii.formIndex(after: &offset) + ascii[offset ] = Digits[byte >> 4] + ascii[offset + 1] = Digits[byte & 0x0f] } } } } extension Base16 -{ +{ #if swift(>=5.6) - @inlinable public static + /// Decodes an ASCII-encoded base-16 string to some (usually trivial) type. + /// This is essentially the same as loading values from raw memory, so this + /// method should only be used to load trivial types. + @inlinable public static func decode(_ ascii:ASCII, loading _:BigEndian.Type = BigEndian.self) -> BigEndian? where ASCII:Sequence, ASCII.Element == UInt8 @@ -116,13 +146,19 @@ extension Base16 } #endif - + /// Encodes the raw bytes of the given value to a base-16 string with the + /// specified lettercasing. The bytes with the lowest addresses appear first + /// in the encoded output. + /// + /// This method is slightly faster than calling ``encode(_:with:)`` on an + /// unsafe buffer-pointer view of `words`. @inlinable public static func encode(storing words:BigEndian, with _:Digits.Type) -> String where Digits:BaseDigits { let bytes:Int = 2 * MemoryLayout.size + #if os(macOS) || os(iOS) || os(tvOS) || os(watchOS) if #available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 14.0, *) { @@ -134,39 +170,26 @@ extension Base16 return bytes } } - else - { - return .init( - decoding: try Self.encode(storing: words, to: [UInt8].self, with: Digits.self), - as: Unicode.UTF8.self) - } - #elseif swift(>=5.4) - return .init(unsafeUninitializedCapacity: bytes) - { - Self.encode(storing: words, - into: UnsafeMutableRawBufferPointer.init($0), - with: Digits.self) - return bytes - } - #else + #endif + + #if os(macOS) || os(iOS) || os(tvOS) || os(watchOS) || swift(<5.4) return .init( - decoding: try Self.encode(storing: words, to: [UInt8].self, with: Digits.self), + decoding: [UInt8].init(unsafeUninitializedCapacity: bytes) + { + Self.encode(storing: words, + into: UnsafeMutableRawBufferPointer.init($0), + with: Digits.self) + $1 = bytes + }, as: Unicode.UTF8.self) - #endif - } - - @inlinable public static - func encode(storing words:BigEndian, to _:[UInt8].Type, - with _:Digits.Type) -> [UInt8] - where Digits:BaseDigits - { - let bytes:Int = 2 * MemoryLayout.size + #else return .init(unsafeUninitializedCapacity: bytes) { Self.encode(storing: words, into: UnsafeMutableRawBufferPointer.init($0), with: Digits.self) - $1 = bytes + return bytes } + #endif } } diff --git a/Sources/Base64/Base64.Input.swift b/Sources/Base64/Base64.Input.swift index b387c81..54d8e75 100644 --- a/Sources/Base64/Base64.Input.swift +++ b/Sources/Base64/Base64.Input.swift @@ -1,17 +1,21 @@ extension Base64 { - /// An abstraction over text input, which discards ASCII whitespace - /// characters. + /// An abstraction over text input, which discards the ASCII whitespace + /// characters [`'\t'`](), [`'\n'`](), [`'\f'`](), [`'\r'`](), and [`' '`](). + /// + /// Iteration over an instance of this type will halt upon encountering the + /// first [`'='`]() padding character, even if the underlying sequence contains + /// more characters. @frozen public - struct Input where UTF8:Sequence, UTF8.Element == UInt8 + struct Input where ASCII:Sequence, ASCII.Element == UInt8 { public - var iterator:UTF8.Iterator + var iterator:ASCII.Iterator @inlinable public - init(_ utf8:UTF8) + init(_ ascii:ASCII) { - self.iterator = utf8.makeIterator() + self.iterator = ascii.makeIterator() } } } diff --git a/Sources/Base64/Base64.swift b/Sources/Base64/Base64.swift index a95588f..5cbc098 100644 --- a/Sources/Base64/Base64.swift +++ b/Sources/Base64/Base64.swift @@ -1,8 +1,26 @@ import BaseDigits +/// A namespace for base-64 utilities. +/// +/// The interface is superficially similar to that of the ``/Base16`` module, +/// but the decoding methods are slightly more lenient in their inputs, as +/// they ignore whitespace and newlines. public enum Base64 { + /// Decodes some ``String``-like type containing an ASCII-encoded base-64 string + /// to some ``RangeReplaceableCollection`` type, skipping over any ASCII + /// whitespace characters. Padding is not required. + /// + /// Characters (including UTF-8 continuation bytes) that are neither base-64 digits + /// nor ASCII whitespace characters will be interpreted as zeros. + /// + /// See ``Base64/Input`` for a list of recognized ASCII whitespace characters. + /// + /// > Important: + /// Unicode whitespace characters, such as non-breaking spaces, will *not* + /// be skipped, and their constituent UTF-8 code units will be interpreted + /// as zeros. @inlinable public static func decode(_ ascii:ASCII, to _:Bytes.Type = Bytes.self) -> Bytes where Bytes:RangeReplaceableCollection, Bytes.Element == UInt8, @@ -10,6 +28,18 @@ enum Base64 { self.decode(ascii.utf8, to: Bytes.self) } + /// Decodes an ASCII-encoded base-64 string to some ``RangeReplaceableCollection`` type, + /// skipping over any ASCII whitespace characters. Padding is not required. + /// + /// Characters (including UTF-8 continuation bytes) that are neither base-64 digits + /// nor ASCII whitespace characters will be interpreted as zeros. + /// + /// See ``Base64/Input`` for a list of recognized ASCII whitespace characters. + /// + /// > Important: + /// Unicode whitespace characters, such as non-breaking spaces, will *not* + /// be skipped, and their constituent UTF-8 code units will be interpreted + /// as zeros. @inlinable public static func decode(_ ascii:ASCII, to _:Bytes.Type = Bytes.self) -> Bytes where Bytes:RangeReplaceableCollection, Bytes.Element == UInt8, @@ -43,6 +73,7 @@ enum Base64 return bytes } + /// Encodes a sequence of bytes to a base-64 string with padding if needed. @inlinable public static func encode(_ bytes:Bytes) -> String where Bytes:Sequence, Bytes.Element == UInt8 { diff --git a/Sources/BaseDigits/BaseDigits.swift b/Sources/BaseDigits/BaseDigits.swift index a582009..b924a6f 100644 --- a/Sources/BaseDigits/BaseDigits.swift +++ b/Sources/BaseDigits/BaseDigits.swift @@ -7,11 +7,13 @@ protocol BaseDigits } extension BaseDigits { + /// Gets the ASCII value for the given remainder as a ``Unicode/Scalar``. @inlinable public static subscript(remainder:UInt8, as _:Unicode.Scalar.Type = Unicode.Scalar.self) -> Unicode.Scalar { .init(Self[remainder]) } + /// Gets the ASCII value for the given remainder as a ``Character``. @inlinable public static subscript(remainder:UInt8, as _:Character.Type = Character.self) -> Character {