RSDK-7565, RSDK-7471: add new methods get_properties and capture_all_from_camera (#617)

bhaney · njooma · commit 554e82846a44 · 2024-05-20T15:25:12.000-04:00
diff --git a/src/viam/services/vision/__init__.py b/src/viam/services/vision/__init__.py
@@ -2,9 +2,10 @@
 from viam.services.vision.service import VisionRPCService
 
 from .client import Classification, Detection, VisionClient
-from .vision import Vision
+from .vision import Vision, CaptureAllResult
 
 __all__ = [
+    "CaptureAllResult",
     "Classification",
     "Detection",
     "VisionClient",
diff --git a/src/viam/services/vision/client.py b/src/viam/services/vision/client.py
@@ -6,6 +6,8 @@
 from viam.media.video import CameraMimeType, ViamImage
 from viam.proto.common import DoCommandRequest, DoCommandResponse, PointCloudObject
 from viam.proto.service.vision import (
+    CaptureAllFromCameraRequest,
+    CaptureAllFromCameraResponse,
     Classification,
     Detection,
     GetClassificationsFromCameraRequest,
@@ -18,12 +20,14 @@
     GetDetectionsResponse,
     GetObjectPointCloudsRequest,
     GetObjectPointCloudsResponse,
+    GetPropertiesRequest,
+    GetPropertiesResponse,
     VisionServiceStub,
 )
 from viam.resource.rpc_client_base import ReconfigurableResourceRPCClientBase
 from viam.utils import ValueTypes, dict_to_struct, struct_to_dict
 
-from .vision import Vision
+from .vision import Vision, CaptureAllResult
 
 
 class VisionClient(Vision, ReconfigurableResourceRPCClientBase):
@@ -39,6 +43,43 @@ def __init__(self, name: str, channel: Channel):
         self.channel = channel
         self.client = VisionServiceStub(channel)
 
+    async def capture_all_from_camera(
+        self,
+        camera_name: str,
+        return_image: bool = False,
+        return_classifications: bool = False,
+        return_detections: bool = False,
+        return_object_point_clouds: bool = False,
+        *,
+        extra: Optional[Mapping[str, Any]] = None,
+        timeout: Optional[float] = None,
+    ) -> CaptureAllResult:
+        if extra is None:
+            extra = {}
+        request = CaptureAllFromCameraRequest(
+                name=self.name,
+                camera_name=camera_name,
+                return_image=return_image,
+                return_classifications=return_classifications,
+                return_detections=return_detections,
+                return_object_point_clouds=return_object_point_clouds,
+                extra=dict_to_struct(extra),
+                )
+        response: CaptureAllFromCameraResponse = await self.client.CaptureAllFromCamera(request, timeout=timeout)
+        result = CaptureAllResult()
+        result.extra = struct_to_dict(response.extra)
+        if return_image:
+            mime_type = CameraMimeType.from_proto(response.image.format)
+            img = ViamImage(response.image.image, mime_type)
+            result.image = img
+        if return_classifications:
+            result.classifications = list(response.classifications)
+        if return_detections:
+            result.detections = list(response.detections)
+        if return_object_point_clouds:
+            result.objects = list(response.objects)
+        return result
+
     async def get_detections_from_camera(
         self,
         camera_name: str,
@@ -135,6 +176,21 @@ async def get_object_point_clouds(
         response: GetObjectPointCloudsResponse = await self.client.GetObjectPointClouds(request, timeout=timeout)
         return list(response.objects)
 
+    async def get_properties(
+        self,
+        *,
+        extra: Optional[Mapping[str, Any]] = None,
+        timeout: Optional[float] = None,
+    ) -> Vision.Properties:
+        if extra is None:
+            extra = {}
+        request = GetPropertiesRequest(
+            name=self.name,
+            extra=dict_to_struct(extra),
+        )
+        response : GetPropertiesResponse = await self.client.GetProperties(request, timeout=timeout)
+        return response
+
     async def do_command(
         self,
         command: Mapping[str, ValueTypes],
diff --git a/src/viam/services/vision/service.py b/src/viam/services/vision/service.py
@@ -2,7 +2,10 @@
 
 from viam.media.video import CameraMimeType, ViamImage
 from viam.proto.common import DoCommandRequest, DoCommandResponse
+from viam.proto.component.camera import Image
 from viam.proto.service.vision import (
+    CaptureAllFromCameraRequest,
+    CaptureAllFromCameraResponse,
     GetClassificationsFromCameraRequest,
     GetClassificationsFromCameraResponse,
     GetClassificationsRequest,
@@ -13,6 +16,8 @@
     GetDetectionsResponse,
     GetObjectPointCloudsRequest,
     GetObjectPointCloudsResponse,
+    GetPropertiesRequest,
+    GetPropertiesResponse,
     UnimplementedVisionServiceBase,
 )
 from viam.resource.rpc_service_base import ResourceRPCServiceBase
@@ -28,6 +33,35 @@ class VisionRPCService(UnimplementedVisionServiceBase, ResourceRPCServiceBase):
 
     RESOURCE_TYPE = Vision
 
+    async def CaptureAllFromCamera(self, stream: Stream[CaptureAllFromCameraRequest, CaptureAllFromCameraResponse]) -> None:
+        request = await stream.recv_message()
+        assert request is not None
+        vision = self.get_resource(request.name)
+        extra = struct_to_dict(request.extra)
+        timeout = stream.deadline.time_remaining() if stream.deadline else None
+        result = await vision.capture_all_from_camera(
+            request.camera_name,
+            return_image=request.return_image,
+            return_classifications=request.return_classifications,
+            return_detections=request.return_detections,
+            return_object_point_clouds=request.return_object_point_clouds,
+            extra=extra,
+            timeout=timeout,
+        )
+        img = None
+        if result.image is not None:
+            fmt = result.image.mime_type.to_proto()
+            img_bytes = result.image.data
+            img = Image(source_name=request.camera_name, format=fmt, image=img_bytes)
+        response = CaptureAllFromCameraResponse(
+            image=img,
+            detections=result.detections,
+            classifications=result.classifications,
+            objects=result.objects,
+            extra=dict_to_struct(result.extra),
+        )
+        await stream.send_message(response)
+
     async def GetDetectionsFromCamera(self, stream: Stream[GetDetectionsFromCameraRequest, GetDetectionsFromCameraResponse]) -> None:
         request = await stream.recv_message()
         assert request is not None
@@ -88,6 +122,21 @@ async def GetObjectPointClouds(self, stream: Stream[GetObjectPointCloudsRequest,
         response = GetObjectPointCloudsResponse(mime_type=CameraMimeType.PCD.value, objects=result)
         await stream.send_message(response)
 
+    async def GetProperties(self, stream: Stream[GetPropertiesRequest, GetPropertiesResponse]) -> None:
+        request = await stream.recv_message()
+        assert request is not None
+        name = request.name
+        vision = self.get_resource(name)
+        extra = struct_to_dict(request.extra)
+        timeout = stream.deadline.time_remaining() if stream.deadline else None
+        properties = await vision.get_properties(extra=extra, timeout=timeout)
+        response = GetPropertiesResponse(
+            classifications_supported=properties.classifications_supported,
+            detections_supported=properties.detections_supported,
+            object_point_clouds_supported=properties.object_point_clouds_supported,
+        )
+        await stream.send_message(response)
+
     async def DoCommand(self, stream: Stream[DoCommandRequest, DoCommandResponse]) -> None:
         request = await stream.recv_message()
         assert request is not None
diff --git a/src/viam/services/vision/vision.py b/src/viam/services/vision/vision.py
@@ -1,27 +1,112 @@
 import abc
-from typing import Any, Final, List, Mapping, Optional
+import sys
+from typing import Any, Final, List, Mapping, Optional, Union
 
 from viam.media.video import ViamImage
 from viam.proto.common import PointCloudObject
-from viam.proto.service.vision import Classification, Detection
+from viam.proto.service.vision import Classification, Detection, GetPropertiesResponse
 from viam.resource.types import RESOURCE_NAMESPACE_RDK, RESOURCE_TYPE_SERVICE, Subtype
 
+if sys.version_info >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
 from ..service_base import ServiceBase
 
 
+class CaptureAllResult:
+    """
+    CaptureAllResult represents the collection of things that you have requested from the
+    CaptureAllFromCamera method. This is used most often for visualization purposes, since normally,
+    returning the image on every call to a classifier/detector/etc would be costly and unnecessary.
+    The default result for each field is None rather than the empty list to distinguish between
+    "there was no request for the classifier/detector to return a result" vs.
+    "the classifier/detector was requested, but there were no results".
+    """
+    def __init__(self, image=None, classifications=None, detections=None, objects=None, extra={}):
+        """
+        Args:
+            image (ViamImage|None): The image from the GetImage request of the camera, if it was requested.
+            classifications (List[Classification]|None): The classifications from GetClassifications, if it was requested.
+            detections (List[Detection]|None): The detections from GetDetections, if it was requested.
+            objects (List[PointCloudObject]|None): the object point clouds from GetObjectPointClouds, if it was requested.
+            extra (dict): A catch all structure, usually for metadata, that a module writer might want to return. Default empty.
+
+        Returns:
+            None
+        """
+        self.image: Union[ViamImage, None] = image
+        self.detections: Union[List[Detection], None] = detections
+        self.classifications: Union[List[Classification], None] = classifications
+        self.objects: Union[List[PointCloudObject], None] = objects
+        self.extra: dict = extra
+
+
 class Vision(ServiceBase):
     """
     Vision represents a Vision service.
 
     This acts as an abstract base class for any drivers representing specific
-    arm implementations. This cannot be used on its own. If the ``__init__()`` function is
+    vision implementations. This cannot be used on its own. If the ``__init__()`` function is
     overridden, it must call the ``super().__init__()`` function.
     """
-
     SUBTYPE: Final = Subtype(  # pyright: ignore [reportIncompatibleVariableOverride]
         RESOURCE_NAMESPACE_RDK, RESOURCE_TYPE_SERVICE, "vision"
     )
 
+    Properties: "TypeAlias" = GetPropertiesResponse
+    """
+    Properties is a class that states what features are supported on the associated vision service.
+    Currently, these are the following properties:
+    - classifications_supported (bool): GetClassifications and GetClassificationsFromCamera are implemented.
+    - detections_supported (bool): GetDetections and GetDetectionsFromCamera are implemented.
+    - object_point_clouds_supported (bool): GetObjectPointClouds is implemented.
+    """
+
+    @abc.abstractmethod
+    async def capture_all_from_camera(
+        self,
+        camera_name: str,
+        return_image: bool = False,
+        return_classifications: bool = False,
+        return_detections: bool = False,
+        return_object_point_clouds: bool = False,
+        *,
+        extra: Optional[Mapping[str, Any]] = None,
+        timeout: Optional[float] = None,
+    ) -> CaptureAllResult:
+        """Get the next image, detections, classifications, and objects all together,
+        given a camera name. Used for visualization.
+
+        ::
+
+            camera_name = "cam1"
+
+            # Grab the detector you configured on your machine
+            my_detector = VisionClient.from_robot(robot, "my_detector")
+
+            # capture all from the next image from the camera
+            result = await my_detector.capture_all_from_camera(
+                camera_name,
+                return_image=True,
+                return_detections=True,
+            )
+
+        Args:
+            camera_name (str): The name of the camera to use for detection
+            return_image (bool): Ask the vision service to return the camera's latest image
+            return_classifications (bool): Ask the vision service to return its latest classifications
+            return_detections (bool): Ask the vision service to return its latest detections
+            return_object_point_clouds (bool): Ask the vision service to return its latest 3D segmentations
+
+        Returns:
+            vision.CaptureAllResult: A class that stores all potential returns from the vision service.
+            It can  return the image from the camera along with its associated detections, classifications,
+            and objects, as well as any extra info the model may provide.
+        """
+        ...
+
     @abc.abstractmethod
     async def get_detections_from_camera(
         self,
@@ -195,3 +280,26 @@ async def get_object_point_clouds(
             List[viam.proto.common.PointCloudObject]: The pointcloud objects with metadata
         """
         ...
+
+    @abc.abstractmethod
+    async def get_properties(
+            self,
+            *,
+            extra: Optional[Mapping[str, Any]] = None,
+            timeout: Optional[float] = None,
+    ) -> Properties:
+        """
+        Get info about what vision methods the vision service provides. Currently returns boolean values that
+        state whether the service implements the classification, detection, and/or 3D object segmentation methods.
+
+        ::
+                # Grab the detector you configured on your machine
+                my_detector = VisionClient.from_robot(robot, "my_detector")
+                properties = await my_detector.get_properties()
+                properties.detections_supported      # returns True
+                properties.classifications_supported # returns False
+
+        Returns:
+            Properties: The properties of the vision service
+        """
+        ...
diff --git a/tests/mocks/services.py b/tests/mocks/services.py
@@ -330,7 +330,7 @@
 from viam.services.mlmodel.utils import flat_tensors_to_ndarrays, ndarrays_to_flat_tensors
 from viam.services.navigation import Navigation
 from viam.services.slam import SLAM
-from viam.services.vision import Vision
+from viam.services.vision import Vision, CaptureAllResult
 from viam.utils import ValueTypes, datetime_to_timestamp, dict_to_struct, struct_to_dict
 
 
@@ -344,17 +344,52 @@ def __init__(
         classifications: List[Classification],
         segmenters: List[str],
         point_clouds: List[PointCloudObject],
+        image: ViamImage,
+        properties: Vision.Properties,
     ):
         self.detectors = detectors
         self.detections = detections
         self.classifiers = classifiers
         self.classifications = classifications
         self.segmenters = segmenters
         self.point_clouds = point_clouds
+        self.image = image
+        self.properties = properties
         self.extra: Optional[Mapping[str, Any]] = None
         self.timeout: Optional[float] = None
         super().__init__(name)
 
+    async def get_properties(
+        self, *, extra: Optional[Mapping[str, Any]] = None, timeout: Optional[float] = None,
+    ) -> Vision.Properties:
+        self.extra = extra
+        self.timeout = timeout
+        return self.properties
+
+    async def capture_all_from_camera(
+        self,
+        camera_name: str,
+        return_image: bool = False,
+        return_classifications: bool = False,
+        return_detections: bool = False,
+        return_object_point_clouds: bool = False,
+        *,
+        extra: Optional[Mapping[str, Any]] = None,
+        timeout: Optional[float] = None,
+    ) -> CaptureAllResult:
+        self.extra = extra
+        self.timeout = timeout
+        result = CaptureAllResult()
+        if return_image:
+            result.image = self.image
+        if return_classifications:
+            result.classifications = self.classifications
+        if return_detections:
+            result.detections = self.detections
+        if return_object_point_clouds:
+            result.objects = self.point_clouds
+        return result
+
     async def get_detections_from_camera(
         self, camera_name: str, *, extra: Optional[Mapping[str, Any]] = None, timeout: Optional[float] = None
     ) -> List[Detection]:
diff --git a/tests/test_vision_service.py b/tests/test_vision_service.py