Skip to content

gnd: Support multiple subgraphs, grafting, subgraph composition in dev mode #6000

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Jun 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
ec4b36d
graph: Add clone_for_deployment to FileLinkResolver to create FileLi…
incrypto32 May 12, 2025
4af0e6d
graph: Add for_deployment to LinkResolverTrait
incrypto32 May 11, 2025
b1e4a0c
core, graph: use for_deployment to get properly scoped resolver
incrypto32 May 12, 2025
b7dcdca
graph: Implement aliases for file link resolver
incrypto32 May 12, 2025
4263ebc
node: Make gnd work with multiple subgraphs
incrypto32 May 12, 2025
1616bf8
node: Support subgraph datasource in gnd
incrypto32 May 12, 2025
3915686
node: correct the default value for manfiest
incrypto32 May 12, 2025
32b7163
core, node, graph: Ignore graft base in dev mode
incrypto32 May 12, 2025
317b381
node: Allow providing a postgres url for gnd
incrypto32 May 13, 2025
fdedb23
node: Do not use pgtemp in windows
incrypto32 May 13, 2025
9a90dff
store: enable `vendored` feature for openssl crate
incrypto32 May 13, 2025
3012224
chain/ethereum: Return error when ipc is used in non unix platform
incrypto32 May 13, 2025
72e0da4
node: Refactor launcher
incrypto32 May 16, 2025
8e51bf2
node/dev : Better error message when database directory doesn't exist
incrypto32 May 16, 2025
747f7a9
node: refactor watcher
incrypto32 May 16, 2025
2eccf06
core, node, graph: Manipulate raw manifest instead of passing
incrypto32 May 22, 2025
d99396f
node: Correct comments on `redeploy_all_subgraphs`
incrypto32 May 22, 2025
80e7139
node/gnd: Deploy all subgraphs first before wathcing files
incrypto32 May 27, 2025
b349d60
core, graph : Refactor LinkResolver trait
incrypto32 May 27, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions chain/ethereum/src/transport.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ impl Transport {
.expect("Failed to connect to Ethereum IPC")
}

#[cfg(not(unix))]
pub async fn new_ipc(_ipc: &str) -> Self {
panic!("IPC connections are not supported on non-Unix platforms")
}

/// Creates a WebSocket transport.
pub async fn new_ws(ws: &str) -> Self {
ws::WebSocket::new(ws)
Expand Down
4 changes: 4 additions & 0 deletions chain/substreams/src/data_source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,10 @@ mod test {
unimplemented!()
}

fn for_manifest(&self, _manifest_path: &str) -> Result<Box<dyn LinkResolver>, Error> {
unimplemented!()
}

async fn cat(&self, _logger: &Logger, _link: &Link) -> Result<Vec<u8>, Error> {
Ok(gen_package().encode_to_vec())
}
Expand Down
7 changes: 6 additions & 1 deletion core/src/subgraph/instance_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,12 @@ impl<S: SubgraphStore> SubgraphInstanceManager<S> {
let manifest = UnresolvedSubgraphManifest::parse(deployment.hash.cheap_clone(), manifest)?;

// Allow for infinite retries for subgraph definition files.
let link_resolver = Arc::from(self.link_resolver.with_retries());
let link_resolver = Arc::from(
self.link_resolver
.for_manifest(&deployment.hash.to_string())
.map_err(SubgraphRegistrarError::Unknown)?
.with_retries(),
);

// Make sure the `raw_yaml` is present on both this subgraph and the graft base.
self.subgraph_store
Expand Down
6 changes: 5 additions & 1 deletion core/src/subgraph/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,12 @@ impl<I: SubgraphInstanceManager> SubgraphAssignmentProviderTrait for SubgraphAss
));
}

let file_bytes = self
let link_resolver = self
.link_resolver
.for_manifest(&loc.hash.to_string())
.map_err(SubgraphAssignmentProviderError::ResolveError)?;

let file_bytes = link_resolver
.cat(&logger, &loc.hash.to_ipfs_link())
.await
.map_err(SubgraphAssignmentProviderError::ResolveError)?;
Expand Down
52 changes: 34 additions & 18 deletions core/src/subgraph/registrar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@ where
start_block_override: Option<BlockPtr>,
graft_block_override: Option<BlockPtr>,
history_blocks: Option<i32>,
ignore_graft_base: bool,
) -> Result<DeploymentLocator, SubgraphRegistrarError> {
// We don't have a location for the subgraph yet; that will be
// assigned when we deploy for real. For logging purposes, make up a
Expand All @@ -286,19 +287,33 @@ where
.logger_factory
.subgraph_logger(&DeploymentLocator::new(DeploymentId(0), hash.clone()));

let raw: serde_yaml::Mapping = {
let file_bytes = self
.resolver
.cat(&logger, &hash.to_ipfs_link())
.await
.map_err(|e| {
SubgraphRegistrarError::ResolveError(
SubgraphManifestResolveError::ResolveError(e),
)
})?;

serde_yaml::from_slice(&file_bytes)
.map_err(|e| SubgraphRegistrarError::ResolveError(e.into()))?
let resolver: Arc<dyn LinkResolver> = Arc::from(
self.resolver
.for_manifest(&hash.to_string())
.map_err(SubgraphRegistrarError::Unknown)?,
);

let raw = {
let mut raw: serde_yaml::Mapping = {
let file_bytes =
resolver
.cat(&logger, &hash.to_ipfs_link())
.await
.map_err(|e| {
SubgraphRegistrarError::ResolveError(
SubgraphManifestResolveError::ResolveError(e),
)
})?;

serde_yaml::from_slice(&file_bytes)
.map_err(|e| SubgraphRegistrarError::ResolveError(e.into()))?
};

if ignore_graft_base {
raw.remove("graft");
}

raw
};

let kind = BlockchainKind::from_manifest(&raw).map_err(|e| {
Expand All @@ -323,7 +338,7 @@ where
node_id,
debug_fork,
self.version_switching_mode,
&self.resolver,
&resolver,
history_blocks,
)
.await?
Expand All @@ -341,7 +356,7 @@ where
node_id,
debug_fork,
self.version_switching_mode,
&self.resolver,
&resolver,
history_blocks,
)
.await?
Expand All @@ -359,7 +374,7 @@ where
node_id,
debug_fork,
self.version_switching_mode,
&self.resolver,
&resolver,
history_blocks,
)
.await?
Expand All @@ -377,7 +392,7 @@ where
node_id,
debug_fork,
self.version_switching_mode,
&self.resolver,
&resolver,
history_blocks,
)
.await?
Expand Down Expand Up @@ -567,10 +582,11 @@ async fn create_subgraph_version<C: Blockchain, S: SubgraphStore>(
history_blocks_override: Option<i32>,
) -> Result<DeploymentLocator, SubgraphRegistrarError> {
let raw_string = serde_yaml::to_string(&raw).unwrap();

let unvalidated = UnvalidatedSubgraphManifest::<C>::resolve(
deployment.clone(),
raw,
resolver,
&resolver,
logger,
ENV_VARS.max_spec_version.clone(),
)
Expand Down
125 changes: 122 additions & 3 deletions graph/src/components/link_resolver/file.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::time::Duration;

Expand All @@ -12,16 +13,29 @@ use crate::prelude::{Error, JsonValueStream, LinkResolver as LinkResolverTrait};
pub struct FileLinkResolver {
base_dir: Option<PathBuf>,
timeout: Duration,
// This is a hashmap that maps the alias name to the path of the file that is aliased
aliases: HashMap<String, PathBuf>,
}

impl Default for FileLinkResolver {
fn default() -> Self {
Self {
base_dir: None,
timeout: Duration::from_secs(30),
aliases: HashMap::new(),
}
}
}

impl FileLinkResolver {
/// Create a new FileLinkResolver
///
/// All paths are treated as absolute paths.
pub fn new() -> Self {
pub fn new(base_dir: Option<PathBuf>, aliases: HashMap<String, PathBuf>) -> Self {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be clearer if aliases was a HashMap<DeploymentHash, PathBuf> which is really what it expresses: where in the filesystem one would find the files for a given deployment.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find it a bit strange to do that, because aliases represents is a mapping from the String aliases that the user provided as cli arguments, yes it does get converted into DeploymentHash but with the recent change to

fn for_manifest(&self, manifest_path: &str) from fn for_deployment(&self, deployment: DeploymentHash)

Also in the cat method we would need to convert the Link again back to DeploymentHash which is a bit wierd, since Link was created by calling to_ipfs_link onDeploymentHash. In cat we use this to resolve aliases properly

Self {
base_dir: None,
base_dir: base_dir,
timeout: Duration::from_secs(30),
aliases,
}
}

Expand All @@ -33,18 +47,59 @@ impl FileLinkResolver {
Self {
base_dir: Some(base_dir.as_ref().to_owned()),
timeout: Duration::from_secs(30),
aliases: HashMap::new(),
}
}

fn resolve_path(&self, link: &str) -> PathBuf {
let path = Path::new(link);

// If the path is an alias, use the aliased path
if let Some(aliased) = self.aliases.get(link) {
return aliased.clone();
}

// Return the path as is if base_dir is None, or join with base_dir if present.
// if "link" is an absolute path, join will simply return that path.
self.base_dir
.as_ref()
.map_or_else(|| path.to_owned(), |base_dir| base_dir.join(link))
}

/// This method creates a new resolver that is scoped to a specific subgraph
/// It will set the base directory to the parent directory of the manifest path
/// This is required because paths mentioned in the subgraph manifest are relative paths
/// and we need a new resolver with the right base directory for the specific subgraph
fn clone_for_manifest(&self, manifest_path_str: &str) -> Result<Self, Error> {
let mut resolver = self.clone();

// Create a path to the manifest based on the current resolver's
// base directory or default to using the deployment string as path
// If the deployment string is an alias, use the aliased path
let manifest_path = if let Some(aliased) = self.aliases.get(&manifest_path_str.to_string())
{
aliased.clone()
} else {
match &resolver.base_dir {
Some(dir) => dir.join(&manifest_path_str),
None => PathBuf::from(manifest_path_str),
}
};

let canonical_manifest_path = manifest_path
.canonicalize()
.map_err(|e| Error::from(anyhow!("Failed to canonicalize manifest path: {}", e)))?;

// The manifest path is the path of the subgraph manifest file in the build directory
// We use the parent directory as the base directory for the new resolver
let base_dir = canonical_manifest_path
.parent()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am a bit confused now, isn't this basically base_dir/deployment_str/.., i.e. base_dir ?

Copy link
Member Author

@incrypto32 incrypto32 May 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

deployment_str can be something like "../subgraph2/subgraph.yaml" in that case the new base_dir is parent of "base_dir/../subgraph2/subgraph.yaml" which is "../subgraph2"

When deployment_str is an absolute path its simply the directory in which the subgraph.yaml is

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't deployment_str just the deployment hash because of the line let deployment_str = deployment.to_string(); ?

.ok_or_else(|| Error::from(anyhow!("Manifest path has no parent directory")))?
.to_path_buf();

resolver.base_dir = Some(base_dir);
Ok(resolver)
}
}

pub fn remove_prefix(link: &str) -> &str {
Expand Down Expand Up @@ -86,6 +141,10 @@ impl LinkResolverTrait for FileLinkResolver {
}
}

fn for_manifest(&self, manifest_path: &str) -> Result<Box<dyn LinkResolverTrait>, Error> {
Ok(Box::new(self.clone_for_manifest(manifest_path)?))
}

async fn get_block(&self, _logger: &Logger, _link: &Link) -> Result<Vec<u8>, Error> {
Err(anyhow!("get_block is not implemented for FileLinkResolver").into())
}
Expand Down Expand Up @@ -117,7 +176,7 @@ mod tests {
file.write_all(test_content).unwrap();

// Create a resolver without a base directory
let resolver = FileLinkResolver::new();
let resolver = FileLinkResolver::default();
let logger = slog::Logger::root(slog::Discard, slog::o!());

// Test valid path resolution
Expand Down Expand Up @@ -185,4 +244,64 @@ mod tests {
let _ = fs::remove_file(test_file_path);
let _ = fs::remove_dir(temp_dir);
}

#[tokio::test]
async fn test_file_resolver_with_aliases() {
// Create a temporary directory for test files
let temp_dir = env::temp_dir().join("file_resolver_test_aliases");
let _ = fs::create_dir_all(&temp_dir);

// Create two test files with different content
let test_file1_path = temp_dir.join("file.txt");
let test_content1 = b"This is the file content";
let mut file1 = fs::File::create(&test_file1_path).unwrap();
file1.write_all(test_content1).unwrap();

let test_file2_path = temp_dir.join("another_file.txt");
let test_content2 = b"This is another file content";
let mut file2 = fs::File::create(&test_file2_path).unwrap();
file2.write_all(test_content2).unwrap();

// Create aliases mapping
let mut aliases = HashMap::new();
aliases.insert("alias1".to_string(), test_file1_path.clone());
aliases.insert("alias2".to_string(), test_file2_path.clone());
aliases.insert("deployment-id".to_string(), test_file1_path.clone());

// Create resolver with aliases
let resolver = FileLinkResolver::new(Some(temp_dir.clone()), aliases);
let logger = slog::Logger::root(slog::Discard, slog::o!());

// Test resolving by aliases
let link1 = Link {
link: "alias1".to_string(),
};
let result1 = resolver.cat(&logger, &link1).await.unwrap();
assert_eq!(result1, test_content1);

let link2 = Link {
link: "alias2".to_string(),
};
let result2 = resolver.cat(&logger, &link2).await.unwrap();
assert_eq!(result2, test_content2);

// Test that the alias works in for_deployment as well
let deployment_resolver = resolver.clone_for_manifest("deployment-id").unwrap();

let expected_dir = test_file1_path.parent().unwrap();
let deployment_base_dir = deployment_resolver.base_dir.clone().unwrap();

let canonical_expected_dir = expected_dir.canonicalize().unwrap();
let canonical_deployment_dir = deployment_base_dir.canonicalize().unwrap();

assert_eq!(
canonical_deployment_dir, canonical_expected_dir,
"Build directory paths don't match"
);

// Clean up
let _ = fs::remove_file(test_file1_path);
let _ = fs::remove_file(test_file2_path);
let _ = fs::remove_dir(temp_dir);
}
}
4 changes: 4 additions & 0 deletions graph/src/components/link_resolver/ipfs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ impl LinkResolverTrait for IpfsResolver {
Box::new(s)
}

fn for_manifest(&self, _manifest_path: &str) -> Result<Box<dyn LinkResolverTrait>, Error> {
Ok(Box::new(self.cheap_clone()))
}

async fn cat(&self, logger: &Logger, link: &Link) -> Result<Vec<u8>, Error> {
let path = ContentPath::new(&link.link)?;
let timeout = self.timeout;
Expand Down
14 changes: 14 additions & 0 deletions graph/src/components/link_resolver/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,20 @@ pub trait LinkResolver: Send + Sync + 'static + Debug {
/// Fetches the IPLD block contents as bytes.
async fn get_block(&self, logger: &Logger, link: &Link) -> Result<Vec<u8>, Error>;

/// Creates a new resolver scoped to a specific subgraph manifest.
///
/// For FileLinkResolver, this sets the base directory to the manifest's parent directory.
/// Note the manifest here is the manifest in the build directory, not the manifest in the source directory
/// to properly resolve relative paths referenced in the manifest (schema, mappings, etc.).
/// For other resolvers (IPFS/Arweave), this simply returns a clone since they use
/// absolute content identifiers.
///
/// The `manifest_path` parameter can be a filesystem path or an alias. Aliases are used
/// in development environments (via `gnd --sources`) to map user-defined
/// aliases to actual subgraph paths, enabling local development with file-based
/// subgraphs that reference each other.
fn for_manifest(&self, manifest_path: &str) -> Result<Box<dyn LinkResolver>, Error>;

/// Read the contents of `link` and deserialize them into a stream of JSON
/// values. The values must each be on a single line; newlines are significant
/// as they are used to split the file contents and each line is deserialized
Expand Down
Loading
Loading