Closed
Description
Describe the bug
👋 I'm getting the error :
Schema contains duplicate unqualified field name "id:1"
This occurs in queries involving multiple JOINs when using the substrait consumer, during the conversion of Substrait relations to the DataFusion Logical Plan — specifically when a query includes more than two JOINs.
I’ve opened a draft PR that addresses this issue. It includes some notes on my understanding of the problem, which seems to stem from how columns are aliased and renamed during the Substrait JOIN → DataFusion JOIN conversion, particularly in the requalify_sides_if_needed function.
There’s a Substrait reproducer and a test included in the PR, but I’m also linking it here for reference.
To Reproduce
{
"extensionUris": [{
"extensionUriAnchor": 1,
"uri": "/functions_aggregate_generic.yaml"
}, {
"extensionUriAnchor": 2,
"uri": "/functions_comparison.yaml"
}],
"extensions": [{
"extensionFunction": {
"extensionUriReference": 1,
"functionAnchor": 0,
"name": "count:"
}
}, {
"extensionFunction": {
"extensionUriReference": 2,
"functionAnchor": 1,
"name": "equal:any_any"
}
}],
"relations": [{
"root": {
"input": {
"project": {
"common": {
"emit": {
"outputMapping": [8, 9, 10, 11]
}
},
"input": {
"join": {
"common": {
"direct": {
}
},
"left": {
"join": {
"common": {
"direct": {
}
},
"left": {
"join": {
"common": {
"direct": {
}
},
"left": {
"aggregate": {
"common": {
"direct": {
}
},
"input": {
"read": {
"common": {
"direct": {
}
},
"baseSchema": {
"names": ["id"],
"struct": {
"types": [{
"i64": {
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}
}],
"typeVariationReference": 0,
"nullability": "NULLABILITY_REQUIRED"
}
},
"virtualTable": {
"values": [{
"fields": [{
"i64": "1",
"nullable": true,
"typeVariationReference": 0
}]
}, {
"fields": [{
"i64": "2",
"nullable": true,
"typeVariationReference": 0
}]
}]
}
}
},
"groupings": [{
"groupingExpressions": [{
"selection": {
"directReference": {
"structField": {
"field": 0
}
},
"rootReference": {
}
}
}],
"expressionReferences": []
}],
"measures": [{
"measure": {
"functionReference": 0,
"args": [],
"sorts": [],
"phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
"outputType": {
"i64": {
"typeVariationReference": 0,
"nullability": "NULLABILITY_REQUIRED"
}
},
"invocation": "AGGREGATION_INVOCATION_ALL",
"arguments": [],
"options": []
}
}],
"groupingExpressions": []
}
},
"right": {
"aggregate": {
"common": {
"direct": {
}
},
"input": {
"read": {
"common": {
"direct": {
}
},
"baseSchema": {
"names": ["id", "category"],
"struct": {
"types": [{
"i64": {
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}
}, {
"string": {
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}
}],
"typeVariationReference": 0,
"nullability": "NULLABILITY_REQUIRED"
}
},
"virtualTable": {
"values": [{
"fields": [{
"i64": "1",
"nullable": true,
"typeVariationReference": 0
}, {
"string": "info",
"nullable": true,
"typeVariationReference": 0
}]
}, {
"fields": [{
"i64": "2",
"nullable": true,
"typeVariationReference": 0
}, {
"string": "low",
"nullable": true,
"typeVariationReference": 0
}]
}]
}
}
},
"groupings": [{
"groupingExpressions": [{
"selection": {
"directReference": {
"structField": {
"field": 0
}
},
"rootReference": {
}
}
}, {
"selection": {
"directReference": {
"structField": {
"field": 1
}
},
"rootReference": {
}
}
}],
"expressionReferences": []
}],
"measures": [],
"groupingExpressions": []
}
},
"expression": {
"scalarFunction": {
"functionReference": 1,
"args": [],
"outputType": {
"bool": {
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}
},
"arguments": [{
"value": {
"selection": {
"directReference": {
"structField": {
"field": 0
}
},
"rootReference": {
}
}
}
}, {
"value": {
"selection": {
"directReference": {
"structField": {
"field": 2
}
},
"rootReference": {
}
}
}
}],
"options": []
}
},
"type": "JOIN_TYPE_LEFT"
}
},
"right": {
"aggregate": {
"common": {
"direct": {
}
},
"input": {
"read": {
"common": {
"direct": {
}
},
"baseSchema": {
"names": ["id"],
"struct": {
"types": [{
"i64": {
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}
}],
"typeVariationReference": 0,
"nullability": "NULLABILITY_REQUIRED"
}
},
"virtualTable": {
"values": [{
"fields": [{
"i64": "1",
"nullable": true,
"typeVariationReference": 0
}]
}, {
"fields": [{
"i64": "2",
"nullable": true,
"typeVariationReference": 0
}]
}]
}
}
},
"groupings": [{
"groupingExpressions": [{
"selection": {
"directReference": {
"structField": {
"field": 0
}
},
"rootReference": {
}
}
}],
"expressionReferences": []
}],
"measures": [{
"measure": {
"functionReference": 0,
"args": [],
"sorts": [],
"phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
"outputType": {
"i64": {
"typeVariationReference": 0,
"nullability": "NULLABILITY_REQUIRED"
}
},
"invocation": "AGGREGATION_INVOCATION_ALL",
"arguments": [],
"options": []
}
}],
"groupingExpressions": []
}
},
"expression": {
"scalarFunction": {
"functionReference": 1,
"args": [],
"outputType": {
"bool": {
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}
},
"arguments": [{
"value": {
"selection": {
"directReference": {
"structField": {
"field": 0
}
},
"rootReference": {
}
}
}
}, {
"value": {
"selection": {
"directReference": {
"structField": {
"field": 4
}
},
"rootReference": {
}
}
}
}],
"options": []
}
},
"type": "JOIN_TYPE_LEFT"
}
},
"right": {
"aggregate": {
"common": {
"direct": {
}
},
"input": {
"read": {
"common": {
"direct": {
}
},
"baseSchema": {
"names": ["id"],
"struct": {
"types": [{
"i64": {
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}
}],
"typeVariationReference": 0,
"nullability": "NULLABILITY_REQUIRED"
}
},
"virtualTable": {
"values": [{
"fields": [{
"i64": "1",
"nullable": true,
"typeVariationReference": 0
}]
}, {
"fields": [{
"i64": "2",
"nullable": true,
"typeVariationReference": 0
}]
}]
}
}
},
"groupings": [{
"groupingExpressions": [{
"selection": {
"directReference": {
"structField": {
"field": 0
}
},
"rootReference": {
}
}
}],
"expressionReferences": []
}],
"measures": [{
"measure": {
"functionReference": 0,
"args": [],
"sorts": [],
"phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
"outputType": {
"i64": {
"typeVariationReference": 0,
"nullability": "NULLABILITY_REQUIRED"
}
},
"invocation": "AGGREGATION_INVOCATION_ALL",
"arguments": [],
"options": []
}
}],
"groupingExpressions": []
}
},
"expression": {
"scalarFunction": {
"functionReference": 1,
"args": [],
"outputType": {
"bool": {
"typeVariationReference": 0,
"nullability": "NULLABILITY_NULLABLE"
}
},
"arguments": [{
"value": {
"selection": {
"directReference": {
"structField": {
"field": 0
}
},
"rootReference": {
}
}
}
}, {
"value": {
"selection": {
"directReference": {
"structField": {
"field": 6
}
},
"rootReference": {
}
}
}
}],
"options": []
}
},
"type": "JOIN_TYPE_LEFT"
}
},
"expressions": [{
"selection": {
"directReference": {
"structField": {
"field": 1
}
},
"rootReference": {
}
}
}, {
"selection": {
"directReference": {
"structField": {
"field": 3
}
},
"rootReference": {
}
}
}, {
"selection": {
"directReference": {
"structField": {
"field": 5
}
},
"rootReference": {
}
}
}, {
"selection": {
"directReference": {
"structField": {
"field": 7
}
},
"rootReference": {
}
}
}]
}
},
"names": ["count_first", "category", "count_second", "count_third"]
}
}],
"expectedTypeUrls": [],
"version": {
"majorNumber": 0,
"minorNumber": 52,
"patchNumber": 0,
"gitHash": ""
}
}
Expected behavior
No response
Additional context
No response