Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
Updating vision reader to also produce class probs and labels (#293)
Browse files Browse the repository at this point in the history
* updating vision reader to also produce class probs and labels

* removing duplicate code

* removing unnecessary import

* oops, fix
  • Loading branch information
AkshitaB committed Jul 30, 2021
1 parent 7b7b9c1 commit 4eb7c27
Show file tree
Hide file tree
Showing 8 changed files with 124 additions and 48 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Fixed tests for Spacy versions greater than 3.1

### Changed

- Updated `VisionReader` to yield all of `RegionDetectorOutput`'s keys in processing.

## [v2.6.0](https://github.com/allenai/allennlp-models/releases/tag/v2.6.0) - 2021-07-19

### Added
Expand Down
6 changes: 4 additions & 2 deletions allennlp_models/vision/dataset_readers/flickr30k.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,9 @@ def _read(self, file_path: str):
full_file_path = os.path.join(self.data_dir, filename)
caption_dicts.append(get_caption_data(full_file_path))

processed_images: Iterable[Optional[Tuple[Tensor, Tensor]]]
processed_images: Iterable[
Optional[Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]]
]
filenames = [f"{caption_dict['image_id']}.jpg" for caption_dict in caption_dicts]
try:
processed_images = self._process_image_paths(
Expand All @@ -221,7 +223,7 @@ def _read(self, file_path: str):
averaged_features_list = []
coordinates_list = []
masks_list = []
for features, coords in processed_images:
for features, coords, _, _ in processed_images:
features_list.append(TensorField(features))
averaged_features_list.append(torch.mean(features, dim=0))
coordinates_list.append(TensorField(coords))
Expand Down
12 changes: 8 additions & 4 deletions allennlp_models/vision/dataset_readers/gqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,9 @@ def _read(self, split_or_filename: str):
)
)

processed_images: Iterable[Optional[Tuple[Tensor, Tensor]]]
processed_images: Iterable[
Optional[Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]]
]
if self.produce_featurized_images:
# It would be much easier to just process one image at a time, but it's faster to process
# them in batches. So this code gathers up instances until it has enough to fill up a batch
Expand Down Expand Up @@ -170,7 +172,7 @@ def _read(self, split_or_filename: str):
def text_to_instance(
self, # type: ignore
question: str,
image: Optional[Union[str, Tuple[Tensor, Tensor]]],
image: Optional[Union[str, Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]]],
answer: Optional[Dict[str, str]] = None,
*,
use_cache: bool = True,
Expand All @@ -195,9 +197,11 @@ def text_to_instance(

if image is not None:
if isinstance(image, str):
features, coords = next(self._process_image_paths([image], use_cache=use_cache))
features, coords, _, _ = next(
self._process_image_paths([image], use_cache=use_cache)
)
else:
features, coords = image
features, coords, _, _ = image
fields["box_features"] = ArrayField(features)
fields["box_coordinates"] = ArrayField(coords)
fields["box_mask"] = ArrayField(
Expand Down
22 changes: 15 additions & 7 deletions allennlp_models/vision/dataset_readers/nlvr2.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,12 @@ def _read(self, split_or_filename: str):
blobs.append(json_blob)

blob_dicts = list(self.shard_iterable(blobs))
processed_images1: Iterable[Optional[Tuple[Tensor, Tensor]]]
processed_images2: Iterable[Optional[Tuple[Tensor, Tensor]]]
processed_images1: Iterable[
Optional[Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]]
]
processed_images2: Iterable[
Optional[Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]]
]
if self.produce_featurized_images:
# It would be much easier to just process one image at a time, but it's faster to process
# them in batches. So this code gathers up instances until it has enough to fill up a batch
Expand Down Expand Up @@ -169,11 +173,15 @@ def _read(self, split_or_filename: str):
yield instance
logger.info(f"Successfully yielded {attempted_instances} instances")

def extract_image_features(self, image: Union[str, Tuple[Tensor, Tensor]], use_cache: bool):
def extract_image_features(
self,
image: Union[str, Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]],
use_cache: bool,
):
if isinstance(image, str):
features, coords = next(self._process_image_paths([image], use_cache=use_cache))
features, coords, _, _ = next(self._process_image_paths([image], use_cache=use_cache))
else:
features, coords = image
features, coords, _, _ = image

return (
ArrayField(features),
Expand All @@ -190,8 +198,8 @@ def text_to_instance(
self, # type: ignore
identifier: Optional[str],
hypothesis: str,
image1: Union[str, Tuple[Tensor, Tensor]],
image2: Union[str, Tuple[Tensor, Tensor]],
image1: Union[str, Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]],
image2: Union[str, Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]],
label: bool,
use_cache: bool = True,
) -> Instance:
Expand Down
10 changes: 6 additions & 4 deletions allennlp_models/vision/dataset_readers/vgqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,9 @@ def _read(self, file_path: str):
questions = questions[question_slice]

question_dicts = list(self.shard_iterable(questions))
processed_images: Iterable[Optional[Tuple[Tensor, Tensor]]]
processed_images: Iterable[
Optional[Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]]
]
if self.produce_featurized_images:
# It would be much easier to just process one image at a time, but it's faster to process
# them in batches. So this code gathers up instances until it has enough to fill up a batch
Expand Down Expand Up @@ -196,7 +198,7 @@ def text_to_instance(
qa_id: int,
question: str,
answer: Optional[str],
image: Union[str, Tuple[Tensor, Tensor]],
image: Union[str, Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]],
use_cache: bool = True,
keep_impossible_questions: bool = True,
) -> Optional[Instance]:
Expand All @@ -207,9 +209,9 @@ def text_to_instance(
}

if isinstance(image, str):
features, coords = next(self._process_image_paths([image], use_cache=use_cache))
features, coords, _, _ = next(self._process_image_paths([image], use_cache=use_cache))
else:
features, coords = image
features, coords, _, _ = image

fields["box_features"] = ArrayField(features)
fields["box_coordinates"] = ArrayField(coords)
Expand Down
98 changes: 74 additions & 24 deletions allennlp_models/vision/dataset_readers/vision_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,9 @@ def __init__(
# feature cache
self.feature_cache_dir = feature_cache_dir
self.coordinates_cache_dir = feature_cache_dir
self.class_probs_cache_dir = feature_cache_dir
self.class_labels_cache_dir = feature_cache_dir

if feature_cache_dir:
self.write_to_cache = write_to_cache
else:
Expand All @@ -130,6 +133,8 @@ def __init__(
self.write_to_cache = True
self._feature_cache_instance: Optional[MutableMapping[str, Tensor]] = None
self._coordinates_cache_instance: Optional[MutableMapping[str, Tensor]] = None
self._class_probs_cache_instance: Optional[MutableMapping[str, Tensor]] = None
self._class_labels_cache_instance: Optional[MutableMapping[str, Tensor]] = None

# image processors
self.image_loader = None
Expand Down Expand Up @@ -206,37 +211,52 @@ def region_detector(self) -> Optional[RegionDetector]:
self._region_detector.eval() # type: ignore[attr-defined]
return self._region_detector # type: ignore[return-value]

def _create_cache(
self,
cache_name: str,
cache_dir: Optional[Union[str, PathLike]] = None,
) -> MutableMapping[str, Tensor]:
if cache_dir is None:
return {}
os.makedirs(cache_dir, exist_ok=True)
return TensorCache(
os.path.join(cache_dir, cache_name),
read_only=not self.write_to_cache,
)

@property
def _feature_cache(self) -> MutableMapping[str, Tensor]:
if self._feature_cache_instance is None:
if self.feature_cache_dir is None:
self._feature_cache_instance = {}
else:
os.makedirs(self.feature_cache_dir, exist_ok=True)
self._feature_cache_instance = TensorCache(
os.path.join(self.feature_cache_dir, "features"),
read_only=not self.write_to_cache,
)

self._feature_cache_instance = self._create_cache("features", self.feature_cache_dir)
return self._feature_cache_instance

@property
def _coordinates_cache(self) -> MutableMapping[str, Tensor]:
if self._coordinates_cache_instance is None:
if self.coordinates_cache_dir is None:
self._coordinates_cache_instance = {}
else:
os.makedirs(self.feature_cache_dir, exist_ok=True) # type: ignore
self._coordinates_cache_instance = TensorCache(
os.path.join(self.feature_cache_dir, "coordinates"), # type: ignore
read_only=not self.write_to_cache,
)

self._coordinates_cache_instance = self._create_cache(
"coordinates", self.coordinates_cache_dir
)
return self._coordinates_cache_instance

@property
def _class_probs_cache(self) -> MutableMapping[str, Tensor]:
if self._class_probs_cache_instance is None:
self._class_probs_cache_instance = self._create_cache(
"class_probs", self.class_probs_cache_dir
)
return self._class_probs_cache_instance

@property
def _class_labels_cache(self) -> MutableMapping[str, Tensor]:
if self._class_labels_cache_instance is None:
self._class_labels_cache_instance = self._create_cache(
"class_labels", self.class_labels_cache_dir
)
return self._class_labels_cache_instance

def _process_image_paths(
self, image_paths: Iterable[str], *, use_cache: bool = True
) -> Iterator[Tuple[Tensor, Tensor]]:
) -> Iterator[Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]]:
"""
Processes the given image paths and returns featurized images.
Expand All @@ -258,7 +278,7 @@ def _process_image_paths(
"an image featurizer, and a region detector."
)

batch: List[Union[str, Tuple[Tensor, Tensor]]] = []
batch: List[Union[str, Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]]] = []
unprocessed_paths: Set[str] = set()

def yield_batch():
Expand All @@ -272,16 +292,44 @@ def yield_batch():
detector_results = self.region_detector(images, sizes, featurized_images)
features = detector_results.features
coordinates = detector_results.boxes
class_probs = detector_results.class_probs
class_labels = detector_results.class_labels

# store the processed results in memory, so we can complete the batch
paths_to_tensors = {path: (features[i], coordinates[i]) for i, path in enumerate(paths)}
paths_to_tensors = {}
for i, path in enumerate(paths):
if class_probs:
class_probs_tensor = class_probs[i]
else:
class_probs_tensor = None

if class_labels:
class_labels_tensor = class_labels[i]
else:
class_labels_tensor = None

paths_to_tensors[path] = (
features[i],
coordinates[i],
class_probs_tensor,
class_labels_tensor,
)

# store the processed results in the cache
if use_cache and self.write_to_cache:
for path, (features, coordinates) in paths_to_tensors.items():
for path, (
features,
coordinates,
class_probs,
class_labels,
) in paths_to_tensors.items():
basename = os.path.basename(path)
self._feature_cache[basename] = features
self._coordinates_cache[basename] = coordinates
if class_probs is not None:
self._class_probs_cache[basename] = class_probs
if class_labels is not None:
self._class_labels_cache[basename] = class_labels

# yield the batch
for b in batch:
Expand All @@ -296,10 +344,12 @@ def yield_batch():
if use_cache:
features: Tensor = self._feature_cache[basename]
coordinates: Tensor = self._coordinates_cache[basename]
class_probs: Optional[Tensor] = self._class_probs_cache.get(basename)
class_labels: Optional[Tensor] = self._class_labels_cache.get(basename)
if len(batch) <= 0:
yield features, coordinates
yield features, coordinates, class_probs, class_labels
else:
batch.append((features, coordinates))
batch.append((features, coordinates, class_probs, class_labels))
else:
# If we're not using the cache, we pretend we had a cache miss here.
raise KeyError
Expand Down
8 changes: 5 additions & 3 deletions allennlp_models/vision/dataset_readers/visual_entailment.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def _read(self, file_path: str):
@overrides
def text_to_instance(
self, # type: ignore
image: Union[str, Tuple[Tensor, Tensor]],
image: Union[str, Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]],
hypothesis: str,
label: Optional[str] = None,
*,
Expand All @@ -90,9 +90,11 @@ def text_to_instance(

if image is not None:
if isinstance(image, str):
features, coords = next(self._process_image_paths([image], use_cache=use_cache))
features, coords, _, _ = next(
self._process_image_paths([image], use_cache=use_cache)
)
else:
features, coords = image
features, coords, _, _ = image

fields["box_features"] = ArrayField(features)
fields["box_coordinates"] = ArrayField(coords)
Expand Down
12 changes: 8 additions & 4 deletions allennlp_models/vision/dataset_readers/vqav2.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,9 @@ class Split(NamedTuple):
questions = questions[question_slice]

question_dicts = list(self.shard_iterable(questions))
processed_images: Iterable[Optional[Tuple[Tensor, Tensor]]]
processed_images: Iterable[
Optional[Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]]
]
if self.produce_featurized_images:
# It would be much easier to just process one image at a time, but it's faster to process
# them in batches. So this code gathers up instances until it has enough to fill up a batch
Expand Down Expand Up @@ -279,7 +281,7 @@ class Split(NamedTuple):
def text_to_instance(
self, # type: ignore
question: str,
image: Union[str, Tuple[Tensor, Tensor]],
image: Union[str, Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]],
answer_counts: Optional[MutableMapping[str, int]] = None,
*,
use_cache: bool = True,
Expand All @@ -293,9 +295,11 @@ def text_to_instance(

if image is not None:
if isinstance(image, str):
features, coords = next(self._process_image_paths([image], use_cache=use_cache))
features, coords, _, _ = next(
self._process_image_paths([image], use_cache=use_cache)
)
else:
features, coords = image
features, coords, _, _ = image

fields["box_features"] = ArrayField(features)
fields["box_coordinates"] = ArrayField(coords)
Expand Down

0 comments on commit 4eb7c27

Please sign in to comment.