Skip to content

Commit 5146663

Browse files
authored
Move JSON Test Format To integration-testing (#2724)
* Move JSON Test Format To integration-testing * Fix RAT
1 parent 4f52a25 commit 5146663

File tree

9 files changed

+1716
-1642
lines changed

9 files changed

+1716
-1642
lines changed

arrow/src/datatypes/datatype.rs

Lines changed: 0 additions & 344 deletions
Large diffs are not rendered by default.

arrow/src/datatypes/field.rs

Lines changed: 0 additions & 277 deletions
Original file line numberDiff line numberDiff line change
@@ -250,283 +250,6 @@ impl Field {
250250
}
251251
}
252252

253-
/// Parse a `Field` definition from a JSON representation.
254-
#[cfg(feature = "json")]
255-
pub fn from(json: &serde_json::Value) -> Result<Self> {
256-
use serde_json::Value;
257-
match *json {
258-
Value::Object(ref map) => {
259-
let name = match map.get("name") {
260-
Some(&Value::String(ref name)) => name.to_string(),
261-
_ => {
262-
return Err(ArrowError::ParseError(
263-
"Field missing 'name' attribute".to_string(),
264-
));
265-
}
266-
};
267-
let nullable = match map.get("nullable") {
268-
Some(&Value::Bool(b)) => b,
269-
_ => {
270-
return Err(ArrowError::ParseError(
271-
"Field missing 'nullable' attribute".to_string(),
272-
));
273-
}
274-
};
275-
let data_type = match map.get("type") {
276-
Some(t) => DataType::from(t)?,
277-
_ => {
278-
return Err(ArrowError::ParseError(
279-
"Field missing 'type' attribute".to_string(),
280-
));
281-
}
282-
};
283-
284-
// Referenced example file: testing/data/arrow-ipc-stream/integration/1.0.0-littleendian/generated_custom_metadata.json.gz
285-
let metadata = match map.get("metadata") {
286-
Some(&Value::Array(ref values)) => {
287-
let mut res: BTreeMap<String, String> = BTreeMap::new();
288-
for value in values {
289-
match value.as_object() {
290-
Some(map) => {
291-
if map.len() != 2 {
292-
return Err(ArrowError::ParseError(
293-
"Field 'metadata' must have exact two entries for each key-value map".to_string(),
294-
));
295-
}
296-
if let (Some(k), Some(v)) =
297-
(map.get("key"), map.get("value"))
298-
{
299-
if let (Some(k_str), Some(v_str)) =
300-
(k.as_str(), v.as_str())
301-
{
302-
res.insert(
303-
k_str.to_string().clone(),
304-
v_str.to_string().clone(),
305-
);
306-
} else {
307-
return Err(ArrowError::ParseError("Field 'metadata' must have map value of string type".to_string()));
308-
}
309-
} else {
310-
return Err(ArrowError::ParseError("Field 'metadata' lacks map keys named \"key\" or \"value\"".to_string()));
311-
}
312-
}
313-
_ => {
314-
return Err(ArrowError::ParseError(
315-
"Field 'metadata' contains non-object key-value pair".to_string(),
316-
));
317-
}
318-
}
319-
}
320-
Some(res)
321-
}
322-
// We also support map format, because Schema's metadata supports this.
323-
// See https://github.com/apache/arrow/pull/5907
324-
Some(&Value::Object(ref values)) => {
325-
let mut res: BTreeMap<String, String> = BTreeMap::new();
326-
for (k, v) in values {
327-
if let Some(str_value) = v.as_str() {
328-
res.insert(k.clone(), str_value.to_string().clone());
329-
} else {
330-
return Err(ArrowError::ParseError(
331-
format!("Field 'metadata' contains non-string value for key {}", k),
332-
));
333-
}
334-
}
335-
Some(res)
336-
}
337-
Some(_) => {
338-
return Err(ArrowError::ParseError(
339-
"Field `metadata` is not json array".to_string(),
340-
));
341-
}
342-
_ => None,
343-
};
344-
345-
// if data_type is a struct or list, get its children
346-
let data_type = match data_type {
347-
DataType::List(_)
348-
| DataType::LargeList(_)
349-
| DataType::FixedSizeList(_, _) => match map.get("children") {
350-
Some(Value::Array(values)) => {
351-
if values.len() != 1 {
352-
return Err(ArrowError::ParseError(
353-
"Field 'children' must have one element for a list data type".to_string(),
354-
));
355-
}
356-
match data_type {
357-
DataType::List(_) => {
358-
DataType::List(Box::new(Self::from(&values[0])?))
359-
}
360-
DataType::LargeList(_) => {
361-
DataType::LargeList(Box::new(Self::from(&values[0])?))
362-
}
363-
DataType::FixedSizeList(_, int) => DataType::FixedSizeList(
364-
Box::new(Self::from(&values[0])?),
365-
int,
366-
),
367-
_ => unreachable!(
368-
"Data type should be a list, largelist or fixedsizelist"
369-
),
370-
}
371-
}
372-
Some(_) => {
373-
return Err(ArrowError::ParseError(
374-
"Field 'children' must be an array".to_string(),
375-
))
376-
}
377-
None => {
378-
return Err(ArrowError::ParseError(
379-
"Field missing 'children' attribute".to_string(),
380-
));
381-
}
382-
},
383-
DataType::Struct(mut fields) => match map.get("children") {
384-
Some(Value::Array(values)) => {
385-
let struct_fields: Result<Vec<Field>> =
386-
values.iter().map(Field::from).collect();
387-
fields.append(&mut struct_fields?);
388-
DataType::Struct(fields)
389-
}
390-
Some(_) => {
391-
return Err(ArrowError::ParseError(
392-
"Field 'children' must be an array".to_string(),
393-
))
394-
}
395-
None => {
396-
return Err(ArrowError::ParseError(
397-
"Field missing 'children' attribute".to_string(),
398-
));
399-
}
400-
},
401-
DataType::Map(_, keys_sorted) => {
402-
match map.get("children") {
403-
Some(Value::Array(values)) if values.len() == 1 => {
404-
let child = Self::from(&values[0])?;
405-
// child must be a struct
406-
match child.data_type() {
407-
DataType::Struct(map_fields) if map_fields.len() == 2 => {
408-
DataType::Map(Box::new(child), keys_sorted)
409-
}
410-
t => {
411-
return Err(ArrowError::ParseError(
412-
format!("Map children should be a struct with 2 fields, found {:?}", t)
413-
))
414-
}
415-
}
416-
}
417-
Some(_) => {
418-
return Err(ArrowError::ParseError(
419-
"Field 'children' must be an array with 1 element"
420-
.to_string(),
421-
))
422-
}
423-
None => {
424-
return Err(ArrowError::ParseError(
425-
"Field missing 'children' attribute".to_string(),
426-
));
427-
}
428-
}
429-
}
430-
DataType::Union(_, type_ids, mode) => match map.get("children") {
431-
Some(Value::Array(values)) => {
432-
let union_fields: Vec<Field> =
433-
values.iter().map(Field::from).collect::<Result<_>>()?;
434-
DataType::Union(union_fields, type_ids, mode)
435-
}
436-
Some(_) => {
437-
return Err(ArrowError::ParseError(
438-
"Field 'children' must be an array".to_string(),
439-
))
440-
}
441-
None => {
442-
return Err(ArrowError::ParseError(
443-
"Field missing 'children' attribute".to_string(),
444-
));
445-
}
446-
},
447-
_ => data_type,
448-
};
449-
450-
let mut dict_id = 0;
451-
let mut dict_is_ordered = false;
452-
453-
let data_type = match map.get("dictionary") {
454-
Some(dictionary) => {
455-
let index_type = match dictionary.get("indexType") {
456-
Some(t) => DataType::from(t)?,
457-
_ => {
458-
return Err(ArrowError::ParseError(
459-
"Field missing 'indexType' attribute".to_string(),
460-
));
461-
}
462-
};
463-
dict_id = match dictionary.get("id") {
464-
Some(Value::Number(n)) => n.as_i64().unwrap(),
465-
_ => {
466-
return Err(ArrowError::ParseError(
467-
"Field missing 'id' attribute".to_string(),
468-
));
469-
}
470-
};
471-
dict_is_ordered = match dictionary.get("isOrdered") {
472-
Some(&Value::Bool(n)) => n,
473-
_ => {
474-
return Err(ArrowError::ParseError(
475-
"Field missing 'isOrdered' attribute".to_string(),
476-
));
477-
}
478-
};
479-
DataType::Dictionary(Box::new(index_type), Box::new(data_type))
480-
}
481-
_ => data_type,
482-
};
483-
Ok(Field {
484-
name,
485-
data_type,
486-
nullable,
487-
dict_id,
488-
dict_is_ordered,
489-
metadata,
490-
})
491-
}
492-
_ => Err(ArrowError::ParseError(
493-
"Invalid json value type for field".to_string(),
494-
)),
495-
}
496-
}
497-
498-
/// Generate a JSON representation of the `Field`.
499-
#[cfg(feature = "json")]
500-
pub fn to_json(&self) -> serde_json::Value {
501-
let children: Vec<serde_json::Value> = match self.data_type() {
502-
DataType::Struct(fields) => fields.iter().map(|f| f.to_json()).collect(),
503-
DataType::List(field)
504-
| DataType::LargeList(field)
505-
| DataType::FixedSizeList(field, _)
506-
| DataType::Map(field, _) => vec![field.to_json()],
507-
_ => vec![],
508-
};
509-
match self.data_type() {
510-
DataType::Dictionary(ref index_type, ref value_type) => serde_json::json!({
511-
"name": self.name,
512-
"nullable": self.nullable,
513-
"type": value_type.to_json(),
514-
"children": children,
515-
"dictionary": {
516-
"id": self.dict_id,
517-
"indexType": index_type.to_json(),
518-
"isOrdered": self.dict_is_ordered
519-
}
520-
}),
521-
_ => serde_json::json!({
522-
"name": self.name,
523-
"nullable": self.nullable,
524-
"type": self.data_type.to_json(),
525-
"children": children
526-
}),
527-
}
528-
}
529-
530253
/// Merge this field into self if it is compatible.
531254
///
532255
/// Struct fields are merged recursively.

0 commit comments

Comments
 (0)