Skip to content

Commit 1642eaf

Browse files
authored
store: Add more debug logs when subgraph is marked unhealthy (#5662)
1 parent fbb4589 commit 1642eaf

File tree

3 files changed

+36
-6
lines changed

3 files changed

+36
-6
lines changed

store/postgres/src/deployment.rs

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,11 @@ use diesel::{
1414
sql_types::{Nullable, Text},
1515
};
1616
use graph::{
17-
blockchain::block_stream::FirehoseCursor, data::subgraph::schema::SubgraphError, env::ENV_VARS,
17+
blockchain::block_stream::FirehoseCursor,
18+
data::subgraph::schema::SubgraphError,
19+
env::ENV_VARS,
1820
schema::EntityType,
21+
slog::{debug, Logger},
1922
};
2023
use graph::{
2124
data::store::scalar::ToPrimitive,
@@ -890,16 +893,24 @@ pub fn update_deployment_status(
890893
/// is healthy as of that block; errors are inserted according to the
891894
/// `block_ptr` they contain
892895
pub(crate) fn insert_subgraph_errors(
896+
logger: &Logger,
893897
conn: &mut PgConnection,
894898
id: &DeploymentHash,
895899
deterministic_errors: &[SubgraphError],
896900
latest_block: BlockNumber,
897901
) -> Result<(), StoreError> {
902+
debug!(
903+
logger,
904+
"Inserting deterministic errors to the db";
905+
"subgraph" => id.to_string(),
906+
"errors" => deterministic_errors.len()
907+
);
908+
898909
for error in deterministic_errors {
899910
insert_subgraph_error(conn, error)?;
900911
}
901912

902-
check_health(conn, id, latest_block)
913+
check_health(logger, conn, id, latest_block)
903914
}
904915

905916
#[cfg(debug_assertions)]
@@ -918,6 +929,7 @@ pub(crate) fn error_count(
918929
/// Checks if the subgraph is healthy or unhealthy as of the given block, or the subgraph latest
919930
/// block if `None`, based on the presence of deterministic errors. Has no effect on failed subgraphs.
920931
fn check_health(
932+
logger: &Logger,
921933
conn: &mut PgConnection,
922934
id: &DeploymentHash,
923935
block: BlockNumber,
@@ -927,7 +939,15 @@ fn check_health(
927939
let has_errors = has_deterministic_errors(conn, id, block)?;
928940

929941
let (new, old) = match has_errors {
930-
true => (SubgraphHealth::Unhealthy, SubgraphHealth::Healthy),
942+
true => {
943+
debug!(
944+
logger,
945+
"Subgraph has deterministic errors. Marking as unhealthy";
946+
"subgraph" => id.to_string(),
947+
"block" => block
948+
);
949+
(SubgraphHealth::Unhealthy, SubgraphHealth::Healthy)
950+
}
931951
false => (SubgraphHealth::Healthy, SubgraphHealth::Unhealthy),
932952
};
933953

@@ -979,6 +999,7 @@ pub(crate) fn entities_with_causality_region(
979999

9801000
/// Reverts the errors and updates the subgraph health if necessary.
9811001
pub(crate) fn revert_subgraph_errors(
1002+
logger: &Logger,
9821003
conn: &mut PgConnection,
9831004
id: &DeploymentHash,
9841005
reverted_block: BlockNumber,
@@ -997,7 +1018,7 @@ pub(crate) fn revert_subgraph_errors(
9971018
// The result will be the same at `reverted_block` or `reverted_block - 1` since the errors at
9981019
// `reverted_block` were just deleted, but semantically we care about `reverted_block - 1` which
9991020
// is the block being reverted to.
1000-
check_health(conn, id, reverted_block - 1)?;
1021+
check_health(&logger, conn, id, reverted_block - 1)?;
10011022

10021023
// If the deployment is failed in both `failed` and `status` columns,
10031024
// update both values respectively to `false` and `healthy`. Basically

store/postgres/src/deployment_store.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1138,13 +1138,20 @@ impl DeploymentStore {
11381138

11391139
if !batch.deterministic_errors.is_empty() {
11401140
deployment::insert_subgraph_errors(
1141+
&self.logger,
11411142
conn,
11421143
&site.deployment,
11431144
&batch.deterministic_errors,
11441145
batch.block_ptr.number,
11451146
)?;
11461147

11471148
if batch.is_non_fatal_errors_active {
1149+
debug!(
1150+
logger,
1151+
"Updating non-fatal errors for subgraph";
1152+
"subgraph" => site.deployment.to_string(),
1153+
"block" => batch.block_ptr.number,
1154+
);
11481155
deployment::update_non_fatal_errors(
11491156
conn,
11501157
&site.deployment,
@@ -1273,6 +1280,7 @@ impl DeploymentStore {
12731280
firehose_cursor: &FirehoseCursor,
12741281
truncate: bool,
12751282
) -> Result<StoreEvent, StoreError> {
1283+
let logger = self.logger.cheap_clone();
12761284
let event = deployment::with_lock(conn, &site, |conn| {
12771285
conn.transaction(|conn| -> Result<_, StoreError> {
12781286
// The revert functions want the number of the first block that we need to get rid of
@@ -1303,7 +1311,7 @@ impl DeploymentStore {
13031311
// importantly creation of dynamic data sources. We ensure in the
13041312
// rest of the code that we only record history for those meta data
13051313
// changes that might need to be reverted
1306-
Layout::revert_metadata(conn, &site, block)?;
1314+
Layout::revert_metadata(&logger, conn, &site, block)?;
13071315

13081316
Ok(event)
13091317
})

store/postgres/src/relational.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -930,12 +930,13 @@ impl Layout {
930930
/// For metadata, reversion always means deletion since the metadata that
931931
/// is subject to reversion is only ever created but never updated
932932
pub fn revert_metadata(
933+
logger: &Logger,
933934
conn: &mut PgConnection,
934935
site: &Site,
935936
block: BlockNumber,
936937
) -> Result<(), StoreError> {
937938
crate::dynds::revert(conn, site, block)?;
938-
crate::deployment::revert_subgraph_errors(conn, &site.deployment, block)?;
939+
crate::deployment::revert_subgraph_errors(logger, conn, &site.deployment, block)?;
939940

940941
Ok(())
941942
}

0 commit comments

Comments
 (0)