Skip to content

Commit 29db3a5

Browse files
authored
Implement a File Link Resolver (#5981)
* graph: Add a new FIleLinkResolver * graph: remove `/ipfs/` prefix when using file link resolver * graph: Implement custom deserialise logic for Link to enable file link resolver * tests: Add runner test that uses file link resolver * graph: Conditionally disable deployment hash validation based on env var * graph: use constant for "/ipfs/" prefix in `remove_prefix` * graph: Simplify resolve_path by removing redundant path.is_absolute() check * graph: Remove leftover println from file_resolver tests * tests: Refactor runner tests extract test utils into recipe.rs * tests: Add a test for file_link_resolver
1 parent 8e002b6 commit 29db3a5

File tree

16 files changed

+590
-230
lines changed

16 files changed

+590
-230
lines changed

.github/workflows/ci.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,13 @@ jobs:
107107
command: test
108108
args: --package graph-tests --test runner_tests
109109

110+
- name: Run file link resolver test
111+
id: file-link-resolver-test
112+
uses: actions-rs/cargo@v1
113+
with:
114+
command: test
115+
args: --package graph-tests --test file_link_resolver
116+
110117
integration-tests:
111118
name: Run integration tests
112119
runs-on: ubuntu-latest
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
use std::path::{Path, PathBuf};
2+
use std::time::Duration;
3+
4+
use anyhow::anyhow;
5+
use async_trait::async_trait;
6+
use slog::Logger;
7+
8+
use crate::data::subgraph::Link;
9+
use crate::prelude::{Error, JsonValueStream, LinkResolver as LinkResolverTrait};
10+
11+
#[derive(Clone, Debug)]
12+
pub struct FileLinkResolver {
13+
base_dir: Option<PathBuf>,
14+
timeout: Duration,
15+
}
16+
17+
impl FileLinkResolver {
18+
/// Create a new FileLinkResolver
19+
///
20+
/// All paths are treated as absolute paths.
21+
pub fn new() -> Self {
22+
Self {
23+
base_dir: None,
24+
timeout: Duration::from_secs(30),
25+
}
26+
}
27+
28+
/// Create a new FileLinkResolver with a base directory
29+
///
30+
/// All paths that are not absolute will be considered
31+
/// relative to this base directory.
32+
pub fn with_base_dir<P: AsRef<Path>>(base_dir: P) -> Self {
33+
Self {
34+
base_dir: Some(base_dir.as_ref().to_owned()),
35+
timeout: Duration::from_secs(30),
36+
}
37+
}
38+
39+
fn resolve_path(&self, link: &str) -> PathBuf {
40+
let path = Path::new(link);
41+
42+
// Return the path as is if base_dir is None, or join with base_dir if present.
43+
// if "link" is an absolute path, join will simply return that path.
44+
self.base_dir
45+
.as_ref()
46+
.map_or_else(|| path.to_owned(), |base_dir| base_dir.join(link))
47+
}
48+
}
49+
50+
pub fn remove_prefix(link: &str) -> &str {
51+
const IPFS: &str = "/ipfs/";
52+
if link.starts_with(IPFS) {
53+
&link[IPFS.len()..]
54+
} else {
55+
link
56+
}
57+
}
58+
59+
#[async_trait]
60+
impl LinkResolverTrait for FileLinkResolver {
61+
fn with_timeout(&self, timeout: Duration) -> Box<dyn LinkResolverTrait> {
62+
let mut resolver = self.clone();
63+
resolver.timeout = timeout;
64+
Box::new(resolver)
65+
}
66+
67+
fn with_retries(&self) -> Box<dyn LinkResolverTrait> {
68+
Box::new(self.clone())
69+
}
70+
71+
async fn cat(&self, logger: &Logger, link: &Link) -> Result<Vec<u8>, Error> {
72+
let link = remove_prefix(&link.link);
73+
let path = self.resolve_path(&link);
74+
75+
slog::debug!(logger, "File resolver: reading file";
76+
"path" => path.to_string_lossy().to_string());
77+
78+
match tokio::fs::read(&path).await {
79+
Ok(data) => Ok(data),
80+
Err(e) => {
81+
slog::error!(logger, "Failed to read file";
82+
"path" => path.to_string_lossy().to_string(),
83+
"error" => e.to_string());
84+
Err(anyhow!("Failed to read file {}: {}", path.display(), e).into())
85+
}
86+
}
87+
}
88+
89+
async fn get_block(&self, _logger: &Logger, _link: &Link) -> Result<Vec<u8>, Error> {
90+
Err(anyhow!("get_block is not implemented for FileLinkResolver").into())
91+
}
92+
93+
async fn json_stream(&self, _logger: &Logger, _link: &Link) -> Result<JsonValueStream, Error> {
94+
Err(anyhow!("json_stream is not implemented for FileLinkResolver").into())
95+
}
96+
}
97+
98+
#[cfg(test)]
99+
mod tests {
100+
use super::*;
101+
use std::env;
102+
use std::fs;
103+
use std::io::Write;
104+
105+
#[tokio::test]
106+
async fn test_file_resolver_absolute() {
107+
// Test the resolver without a base directory (absolute paths only)
108+
109+
// Create a temporary directory for test files
110+
let temp_dir = env::temp_dir().join("file_resolver_test");
111+
let _ = fs::create_dir_all(&temp_dir);
112+
113+
// Create a test file in the temp directory
114+
let test_file_path = temp_dir.join("test.txt");
115+
let test_content = b"Hello, world!";
116+
let mut file = fs::File::create(&test_file_path).unwrap();
117+
file.write_all(test_content).unwrap();
118+
119+
// Create a resolver without a base directory
120+
let resolver = FileLinkResolver::new();
121+
let logger = slog::Logger::root(slog::Discard, slog::o!());
122+
123+
// Test valid path resolution
124+
let link = Link {
125+
link: test_file_path.to_string_lossy().to_string(),
126+
};
127+
let result = resolver.cat(&logger, &link).await.unwrap();
128+
assert_eq!(result, test_content);
129+
130+
// Test path with leading slash that likely doesn't exist
131+
let link = Link {
132+
link: "/test.txt".to_string(),
133+
};
134+
let result = resolver.cat(&logger, &link).await;
135+
assert!(
136+
result.is_err(),
137+
"Reading /test.txt should fail as it doesn't exist"
138+
);
139+
140+
// Clean up
141+
let _ = fs::remove_file(test_file_path);
142+
let _ = fs::remove_dir(temp_dir);
143+
}
144+
145+
#[tokio::test]
146+
async fn test_file_resolver_with_base_dir() {
147+
// Test the resolver with a base directory
148+
149+
// Create a temporary directory for test files
150+
let temp_dir = env::temp_dir().join("file_resolver_test_base_dir");
151+
let _ = fs::create_dir_all(&temp_dir);
152+
153+
// Create a test file in the temp directory
154+
let test_file_path = temp_dir.join("test.txt");
155+
let test_content = b"Hello from base dir!";
156+
let mut file = fs::File::create(&test_file_path).unwrap();
157+
file.write_all(test_content).unwrap();
158+
159+
// Create a resolver with a base directory
160+
let resolver = FileLinkResolver::with_base_dir(&temp_dir);
161+
let logger = slog::Logger::root(slog::Discard, slog::o!());
162+
163+
// Test relative path (no leading slash)
164+
let link = Link {
165+
link: "test.txt".to_string(),
166+
};
167+
let result = resolver.cat(&logger, &link).await.unwrap();
168+
assert_eq!(result, test_content);
169+
170+
// Test absolute path
171+
let link = Link {
172+
link: test_file_path.to_string_lossy().to_string(),
173+
};
174+
let result = resolver.cat(&logger, &link).await.unwrap();
175+
assert_eq!(result, test_content);
176+
177+
// Test missing file
178+
let link = Link {
179+
link: "missing.txt".to_string(),
180+
};
181+
let result = resolver.cat(&logger, &link).await;
182+
assert!(result.is_err());
183+
184+
// Clean up
185+
let _ = fs::remove_file(test_file_path);
186+
let _ = fs::remove_dir(temp_dir);
187+
}
188+
}

graph/src/components/link_resolver/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,12 @@ use crate::prelude::Error;
77
use std::fmt::Debug;
88

99
mod arweave;
10+
mod file;
1011
mod ipfs;
1112

1213
pub use arweave::*;
1314
use async_trait::async_trait;
15+
pub use file::*;
1416
pub use ipfs::*;
1517

1618
/// Resolves links to subgraph manifests and resources referenced by them.

graph/src/data/subgraph/mod.rs

Lines changed: 70 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -116,20 +116,23 @@ impl DeploymentHash {
116116
pub fn new(s: impl Into<String>) -> Result<Self, String> {
117117
let s = s.into();
118118

119-
// Enforce length limit
120-
if s.len() > 46 {
121-
return Err(s);
122-
}
119+
// When the disable_deployment_hash_validation flag is set, we skip the validation
120+
if !ENV_VARS.disable_deployment_hash_validation {
121+
// Enforce length limit
122+
if s.len() > 46 {
123+
return Err(s);
124+
}
123125

124-
// Check that the ID contains only allowed characters.
125-
if !s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
126-
return Err(s);
127-
}
126+
// Check that the ID contains only allowed characters.
127+
if !s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
128+
return Err(s);
129+
}
128130

129-
// Allow only deployment id's for 'real' subgraphs, not the old
130-
// metadata subgraph.
131-
if s == "subgraphs" {
132-
return Err(s);
131+
// Allow only deployment id's for 'real' subgraphs, not the old
132+
// metadata subgraph.
133+
if s == "subgraphs" {
134+
return Err(s);
135+
}
133136
}
134137

135138
Ok(DeploymentHash(s))
@@ -397,12 +400,65 @@ impl From<HashMap<Word, Value>> for DataSourceContext {
397400
}
398401

399402
/// IPLD link.
400-
#[derive(Clone, Debug, Default, Hash, Eq, PartialEq, Deserialize)]
403+
#[derive(Clone, Debug, Default, Hash, Eq, PartialEq)]
401404
pub struct Link {
402-
#[serde(rename = "/")]
403405
pub link: String,
404406
}
405407

408+
/// Custom deserializer for Link
409+
/// This handles both formats:
410+
/// 1. Simple string: "schema.graphql" or "subgraph.yaml" which is used in [`FileLinkResolver`]
411+
/// FileLinkResolver is used in local development environments
412+
/// 2. IPLD format: { "/": "Qm..." } which is used in [`IpfsLinkResolver`]
413+
impl<'de> de::Deserialize<'de> for Link {
414+
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
415+
where
416+
D: de::Deserializer<'de>,
417+
{
418+
struct LinkVisitor;
419+
420+
impl<'de> de::Visitor<'de> for LinkVisitor {
421+
type Value = Link;
422+
423+
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
424+
formatter.write_str("string or map with '/' key")
425+
}
426+
427+
fn visit_str<E>(self, value: &str) -> Result<Link, E>
428+
where
429+
E: de::Error,
430+
{
431+
Ok(Link {
432+
link: value.to_string(),
433+
})
434+
}
435+
436+
fn visit_map<A>(self, mut map: A) -> Result<Link, A::Error>
437+
where
438+
A: de::MapAccess<'de>,
439+
{
440+
let mut link = None;
441+
442+
while let Some(key) = map.next_key::<String>()? {
443+
if key == "/" {
444+
if link.is_some() {
445+
return Err(de::Error::duplicate_field("/"));
446+
}
447+
link = Some(map.next_value()?);
448+
} else {
449+
return Err(de::Error::unknown_field(&key, &["/"]));
450+
}
451+
}
452+
453+
link.map(|l: String| Link { link: l })
454+
.ok_or_else(|| de::Error::missing_field("/"))
455+
}
456+
}
457+
458+
deserializer.deserialize_any(LinkVisitor)
459+
}
460+
}
461+
406462
impl<S: ToString> From<S> for Link {
407463
fn from(s: S) -> Self {
408464
Self {

graph/src/env/mod.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,12 @@ pub struct EnvVars {
225225
/// if no genesis hash can be retrieved from an adapter. If enabled, the adapter is
226226
/// ignored if unable to produce a genesis hash or produces a different an unexpected hash.
227227
pub genesis_validation_enabled: bool,
228+
/// Whether to enforce deployment hash validation rules.
229+
/// When disabled, any string can be used as a deployment hash.
230+
/// When enabled, deployment hashes must meet length and character constraints.
231+
///
232+
/// Set by the flag `GRAPH_NODE_DISABLE_DEPLOYMENT_HASH_VALIDATION`. Enabled by default.
233+
pub disable_deployment_hash_validation: bool,
228234
/// How long do we wait for a response from the provider before considering that it is unavailable.
229235
/// Default is 30s.
230236
pub genesis_validation_timeout: Duration,
@@ -332,6 +338,7 @@ impl EnvVars {
332338
section_map: inner.section_map,
333339
firehose_grpc_max_decode_size_mb: inner.firehose_grpc_max_decode_size_mb,
334340
genesis_validation_enabled: inner.genesis_validation_enabled.0,
341+
disable_deployment_hash_validation: inner.disable_deployment_hash_validation.0,
335342
genesis_validation_timeout: Duration::from_secs(inner.genesis_validation_timeout),
336343
graphman_server_auth_token: inner.graphman_server_auth_token,
337344
firehose_disable_extended_blocks_for_chains:
@@ -528,6 +535,11 @@ struct Inner {
528535
firehose_block_fetch_timeout: u64,
529536
#[envconfig(from = "GRAPH_FIREHOSE_FETCH_BLOCK_BATCH_SIZE", default = "10")]
530537
firehose_block_fetch_batch_size: usize,
538+
#[envconfig(
539+
from = "GRAPH_NODE_DISABLE_DEPLOYMENT_HASH_VALIDATION",
540+
default = "false"
541+
)]
542+
disable_deployment_hash_validation: EnvVarBoolean,
531543
}
532544

533545
#[derive(Clone, Debug)]
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
[
2+
{
3+
"anonymous": false,
4+
"inputs": [
5+
{
6+
"indexed": false,
7+
"internalType": "string",
8+
"name": "testCommand",
9+
"type": "string"
10+
}
11+
],
12+
"name": "TestEvent",
13+
"type": "event"
14+
}
15+
]
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"name": "file-link-resolver",
3+
"version": "0.1.0",
4+
"scripts": {
5+
"codegen": "graph codegen --skip-migrations",
6+
"create:test": "graph create test/file-link-resolver --node $GRAPH_NODE_ADMIN_URI",
7+
"deploy:test": "graph deploy test/file-link-resolver --version-label v0.0.1 --ipfs $IPFS_URI --node $GRAPH_NODE_ADMIN_URI"
8+
},
9+
"devDependencies": {
10+
"@graphprotocol/graph-cli": "0.60.0",
11+
"@graphprotocol/graph-ts": "0.31.0"
12+
}
13+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
type Block @entity {
2+
id: ID!
3+
number: BigInt!
4+
hash: Bytes!
5+
}

0 commit comments

Comments
 (0)