ar_deep_hash.erl - Arweave Deep Hash Algorithm
Overview
Purpose: Deterministic deep hashing of nested data structures
Module: ar_deep_hash
Hash Function: SHA-384
Use Cases: Transaction signing, data verification, content addressing
Deep hash is a cryptographic hash algorithm that recursively hashes nested lists and binary data structures. It provides a deterministic way to hash complex data structures while preserving structural information through tagged hashing.
Dependencies
- Erlang/OTP:
crypto - External: None
Public Functions Overview
%% Deep Hashing
-spec hash(List) -> Hash
when
List :: list(),
Hash :: binary().Public Functions
1. hash/1
-spec hash(List) -> Hash
when
List :: list(),
Hash :: binary().Description: Compute deep hash of a list. The algorithm recursively hashes nested structures with size-tagged prefixes to ensure structural integrity. List elements can be binaries or nested lists.
Algorithm:- For Binaries (within lists): Hash as
SHA384(SHA384("blob" + size) + SHA384(data)) - For Lists: Hash as recursive accumulation with
"list" + lengthtag
Output: 48-byte (384-bit) SHA-384 hash
Test Code:-module(ar_deep_hash_hash_test).
-include_lib("eunit/include/eunit.hrl").
hash_empty_list_test() ->
Hash = ar_deep_hash:hash([]),
?assert(is_binary(Hash)),
?assertEqual(48, byte_size(Hash)).
hash_single_element_list_test() ->
Data = [<<"Element">>],
Hash = ar_deep_hash:hash(Data),
?assert(is_binary(Hash)),
?assertEqual(48, byte_size(Hash)).
hash_single_binary_wrapped_test() ->
Hash = ar_deep_hash:hash([<<"Hello, Arweave!">>]),
?assert(is_binary(Hash)),
?assertEqual(48, byte_size(Hash)).
hash_empty_binary_wrapped_test() ->
Hash = ar_deep_hash:hash([<<>>]),
?assert(is_binary(Hash)),
?assertEqual(48, byte_size(Hash)).
hash_deterministic_test() ->
Data = [<<"Test data">>],
Hash1 = ar_deep_hash:hash(Data),
Hash2 = ar_deep_hash:hash(Data),
?assertEqual(Hash1, Hash2).
hash_different_data_test() ->
Hash1 = ar_deep_hash:hash([<<"Data1">>]),
Hash2 = ar_deep_hash:hash([<<"Data2">>]),
?assertNotEqual(Hash1, Hash2).
hash_multiple_elements_test() ->
Data = [<<"First">>, <<"Second">>, <<"Third">>],
Hash = ar_deep_hash:hash(Data),
?assertEqual(48, byte_size(Hash)).
hash_nested_list_test() ->
Data = [<<"Outer">>, [<<"Inner1">>, <<"Inner2">>], <<"Final">>],
Hash = ar_deep_hash:hash(Data),
?assertEqual(48, byte_size(Hash)).
hash_deep_nesting_test() ->
Data = [
<<"Level1">>,
[
<<"Level2">>,
[
<<"Level3">>,
[<<"Level4">>]
]
]
],
Hash = ar_deep_hash:hash(Data),
?assertEqual(48, byte_size(Hash)).
hash_order_matters_test() ->
List1 = [<<"A">>, <<"B">>, <<"C">>],
List2 = [<<"C">>, <<"B">>, <<"A">>],
Hash1 = ar_deep_hash:hash(List1),
Hash2 = ar_deep_hash:hash(List2),
?assertNotEqual(Hash1, Hash2).
hash_structure_matters_test() ->
% Flat list vs nested list with same content
Flat = [<<"A">>, <<"B">>, <<"C">>],
Nested = [<<"A">>, [<<"B">>, <<"C">>]],
HashFlat = ar_deep_hash:hash(Flat),
HashNested = ar_deep_hash:hash(Nested),
?assertNotEqual(HashFlat, HashNested).
hash_size_prefix_test() ->
% Same content but different structure should hash differently
Single = [<<"AB">>],
Double = [<<"A">>, <<"B">>],
HashSingle = ar_deep_hash:hash(Single),
HashDouble = ar_deep_hash:hash(Double),
?assertNotEqual(HashSingle, HashDouble).
hash_large_data_test() ->
LargeData = [binary:copy(<<"X">>, 1000000)], % 1MB wrapped in list
Hash = ar_deep_hash:hash(LargeData),
?assertEqual(48, byte_size(Hash)).
hash_binary_list_test() ->
Data = [
<<"String 1">>,
<<"String 2">>,
<<"String 3">>,
<<"String 4">>
],
Hash = ar_deep_hash:hash(Data),
?assertEqual(48, byte_size(Hash)).
hash_transaction_like_test() ->
% Simulating transaction signature data
TxData = [
<<"2">>, % Format
crypto:strong_rand_bytes(512), % Owner
<<>>, % Target
<<"0">>, % Quantity
<<"1000000">>, % Reward
<<>>, % Anchor
<<"0">>, % Data size
<<>> % Data root
],
Hash = ar_deep_hash:hash(TxData),
?assertEqual(48, byte_size(Hash)).Algorithm Details
Binary Hashing (Internal)
For a binary B within a list:
Tag = "blob" + byte_size(B)
Hash = SHA384(SHA384(Tag) + SHA384(B))Data = <<"Hello">>,
Tag = <<"blob5">>, % "blob" + byte_size(<<"Hello">>)
Hash = crypto:hash(sha384, <<
(crypto:hash(sha384, Tag))/binary,
(crypto:hash(sha384, Data))/binary
>>).List Hashing
For a list [H1, H2, ..., Hn]:
Tag = "list" + length(List)
Acc0 = SHA384(Tag)
Acc1 = SHA384(Acc0 + DeepHash(H1))
Acc2 = SHA384(Acc1 + DeepHash(H2))
...
AccN = SHA384(AccN-1 + DeepHash(Hn))
Result = AccNList = [<<"A">>, <<"B">>],
Tag = <<"list2">>, % "list" + length(List)
Acc0 = crypto:hash(sha384, Tag),
Acc1 = crypto:hash(sha384, <<Acc0/binary, (hash_bin_or_list(<<"A">>))/binary>>),
Acc2 = crypto:hash(sha384, <<Acc1/binary, (hash_bin_or_list(<<"B">>))/binary>>),
Result = Acc2.Properties
Determinism
The same input always produces the same hash:
Hash1 = ar_deep_hash:hash(Data),
Hash2 = ar_deep_hash:hash(Data),
Hash1 =:= Hash2. % Always trueStructure Preservation
Different structures with same content hash differently:
Flat = [<<"A">>, <<"B">>, <<"C">>],
Nested = [<<"A">>, [<<"B">>, <<"C">>]],
ar_deep_hash:hash(Flat) =/= ar_deep_hash:hash(Nested). % TrueOrder Sensitivity
Element order affects the hash:
List1 = [<<"A">>, <<"B">>],
List2 = [<<"B">>, <<"A">>],
ar_deep_hash:hash(List1) =/= ar_deep_hash:hash(List2). % TrueSize Awareness
Content size is embedded in the hash:
List1 = [<<"AB">>],
List2 = [<<"A">>],
ar_deep_hash:hash(List1) =/= ar_deep_hash:hash(List2). % TrueCommon Patterns
%% Hash a single binary (must wrap in list)
Data = <<"Transaction data">>,
Hash = ar_deep_hash:hash([Data]).
%% Hash transaction signature data (from ar_tx.erl)
SignatureData = [
integer_to_binary(Format),
Owner,
Target,
list_to_binary(integer_to_list(Quantity)),
list_to_binary(integer_to_list(Reward)),
Anchor,
integer_to_binary(DataSize),
DataRoot
],
TxHash = ar_deep_hash:hash(SignatureData).
%% Hash nested structure
NestedData = [
<<"Header">>,
[
<<"Field1">>, <<"Value1">>,
<<"Field2">>, <<"Value2">>
],
<<"Footer">>
],
StructHash = ar_deep_hash:hash(NestedData).
%% Hash for content addressing
Content = <<"File content">>,
ContentID = ar_deep_hash:hash([Content]).
%% Verify data integrity
OriginalHash = ar_deep_hash:hash([Data]),
% ... store or transmit data ...
ReceivedHash = ar_deep_hash:hash([ReceivedData]),
Valid = (OriginalHash =:= ReceivedHash).
%% Hash empty structures
EmptyList = ar_deep_hash:hash([]).
EmptyBinaryInList = ar_deep_hash:hash([<<>>]).Use Cases in Arweave
1. Transaction Signing
% From ar_tx.erl - signature_data_segment/1
SignatureData = [
integer_to_binary(TX#tx.format),
TX#tx.owner,
TX#tx.target,
list_to_binary(integer_to_list(TX#tx.quantity)),
list_to_binary(integer_to_list(TX#tx.reward)),
TX#tx.anchor,
integer_to_binary(TX#tx.data_size),
TX#tx.data_root
],
Hash = ar_deep_hash:hash(SignatureData),
Signature = ar_wallet:sign(PrivKey, Hash).2. Data Item Verification (ANS-104)
% Verify data item signature data
SignatureData = [
<<"dataitem">>,
<<"1">>, % Version
SignatureType,
Target,
Anchor,
Tags,
Data
],
Hash = ar_deep_hash:hash(SignatureData).3. Merkle Tree Construction
% Hash merkle tree nodes
LeafHash = ar_deep_hash:hash([<<"leaf">>, Data]),
NodeHash = ar_deep_hash:hash([<<"node">>, LeftHash, RightHash]).4. Content Addressing
% Create content-addressed identifier
ContentHash = ar_deep_hash:hash([FileData]),
% Use as unique, deterministic IDSecurity Properties
Collision Resistance
SHA-384 provides strong collision resistance:
- Finding two inputs with the same hash is computationally infeasible
- 384-bit output space (2^384 possible hashes)
Preimage Resistance
Given a hash, finding the original input is computationally infeasible.
Second Preimage Resistance
Given an input, finding a different input with the same hash is computationally infeasible.
Tagged Hashing
The "blob" and "list" tags prevent:
- Length extension attacks
- Structural ambiguity attacks
- Cross-protocol hash reuse
Implementation Notes
Tagged Prefixes
% Binary tag includes size
Tag = <<"blob", (integer_to_binary(byte_size(Bin)))/binary>>
% Example: <<"blob5">> for 5-byte binary
% List tag includes length
Tag = <<"list", (integer_to_binary(length(List)))/binary>>
% Example: <<"list3">> for 3-element listRecursive Structure
The algorithm recursively processes nested structures:
hash_bin_or_list(Bin) when is_binary(Bin) -> ...;
hash_bin_or_list(List) when is_list(List) -> ...Each element is hashed independently, then combined.
Hash Size
All hashes are 48 bytes (384 bits):
Hash = crypto:hash(sha384, Data),
48 = byte_size(Hash).Comparison with Other Hash Algorithms
vs. Simple SHA-384
% Simple hash loses structure
Simple = crypto:hash(sha384, <<<<"A">>/binary, <<"B">>/binary>>),
% Deep hash preserves structure
Deep1 = ar_deep_hash:hash([<<"A">>, <<"B">>]),
Deep2 = ar_deep_hash:hash([<<"AB">>]),
Deep1 =/= Deep2. % True - structure preservedvs. Merkle Trees
% Deep hash is like a Merkle tree with tagged nodes
% But it's a single hash function, not a tree structurevs. JSON Hashing
% Deep hash doesn't require serialization
% Operates directly on Erlang data structures
% More efficient and deterministic than JSON → hashPerformance Considerations
Complexity
- Single binary in list: O(1) hash operations (constant)
- List of N elements: O(N) hash operations (linear)
- Nested structures: O(total_elements) across all levels
Memory
- Tail-recursive list processing
- Constant memory overhead per recursion level
- No additional data structure allocation
Benchmarks
% Single binary in list (16 bytes): ~1-2 μs
% Large binary in list (1 MB): ~5-10 ms
% List (100 elements): ~200-300 μs
% Nested list (10 levels, 10 each): ~1-2 msTest Vectors
%% Empty list
ar_deep_hash:hash([]) =
SHA384("list0")
%% Single binary
ar_deep_hash:hash([<<"test">>]) =
Tag = SHA384("list1"),
BlobHash = SHA384(SHA384("blob4") + SHA384("test")),
SHA384(Tag + BlobHash)
%% Two binaries
ar_deep_hash:hash([<<"a">>, <<"b">>]) =
Tag = SHA384("list2"),
Acc1 = SHA384(Tag + DeepHash(<<"a">>)),
Acc2 = SHA384(Acc1 + DeepHash(<<"b">>))
%% Nested structure
ar_deep_hash:hash([<<"x">>, [<<"y">>]]) =
OuterTag = SHA384("list2"),
InnerList = SHA384("list1"),
InnerComplete = SHA384(InnerList + DeepHash(<<"y">>)),
Acc1 = SHA384(OuterTag + DeepHash(<<"x">>)),
Final = SHA384(Acc1 + InnerComplete)Error Handling
The module assumes valid input:
- Lists containing binaries or other lists
- No atoms, numbers, or other Erlang terms directly
% These will crash:
ar_deep_hash:hash(<<"binary">>). % Must be wrapped in list
ar_deep_hash:hash(123). % Not a list
ar_deep_hash:hash([atom]). % Atoms not supported
ar_deep_hash:hash([1, 2, 3]). % Numbers not supportedReferences
- Arweave Yellow Paper - Deep hash specification
- SHA-384 - NIST FIPS 180-4
- Merkle Trees - Hash tree structures
- ar_tx.erl - Uses deep hash for transaction signing
- ar_bundles.erl - Uses deep hash for data items
Notes
- Input Type: The public
hash/1function only accepts lists; wrap binaries in a list - Output Size: Always 48 bytes (384 bits) regardless of input
- Deterministic: Same input always produces same hash
- Structure Sensitive: Different structures hash differently even with same content
- Tagged: Uses "blob" and "list" prefixes to prevent ambiguity
- Recursive: Handles arbitrary nesting depth
- Order Dependent: Element order affects the hash
- Size Embedded: Element/list size is part of the hash
- No Serialization: Works directly on Erlang data structures
- Performance: Linear in number of elements, efficient for nested structures