Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
ece53c1
Modify group_images_into_events to read new images and merge into exi…
mohamedelabbas1996 Jul 22, 2025
76a4cb4
Refactor group_images_into_events to simplify logic and reduce code d…
mohamedelabbas1996 Jul 23, 2025
9c9a216
Add tests for image grouping
mohamedelabbas1996 Jul 23, 2025
5249e77
Remove unused group_by field from Event model
mohamedelabbas1996 Jul 23, 2025
1b2d2a6
Removed _create_captures_from_url
mohamedelabbas1996 Jul 23, 2025
660e883
Merge branch 'main' into fix/group-images-into-sessions
mohamedelabbas1996 Jul 23, 2025
0bbcfdd
Added missing import
mohamedelabbas1996 Jul 23, 2025
4fb045a
feat: action to remove images and occurrences from events
mihow Jul 24, 2025
7a4949c
feat: clear images and occurrences from events from event model
mihow Jul 24, 2025
a9213ee
fix: update count in logs
mihow Jul 24, 2025
7cb4c6e
Refactor group_images_into_events: removed unnecessary checks and que…
mohamedelabbas1996 Jul 24, 2025
706c560
Merge branch 'fix/group-images-into-sessions' of https://github.com/R…
mohamedelabbas1996 Jul 24, 2025
9bd037f
fix: allow the group_by removal migration to be reversed
mihow Jul 24, 2025
05ce90c
Merge branch 'fix/group-images-into-sessions' of github.com:RolnickLa…
mihow Jul 24, 2025
e0fcadc
test: add tests for full regrouping with updated time gap and new ove…
mohamedelabbas1996 Jul 25, 2025
aeee536
Merge branch 'fix/group-images-into-sessions' of https://github.com/R…
mohamedelabbas1996 Jul 25, 2025
5d8df37
Add admin action to fix sessions by regrouping images per deployment
mohamedelabbas1996 Jul 25, 2025
67ff8b1
feat: clean up when events are regrouped for a deployment
mihow Jul 25, 2025
ebcf866
Handle merging with following overlapping events
mohamedelabbas1996 Jul 25, 2025
bb14620
Merge branch 'fix/group-images-into-sessions' of https://github.com/R…
mohamedelabbas1996 Jul 25, 2025
5465b4d
Tried fixing broken tests
mohamedelabbas1996 Jul 25, 2025
20ed64c
Updated fix_sessions admin action to use dissociate_related_objects
mohamedelabbas1996 Jul 25, 2025
21e39eb
Updated fix_sessions admin action group_images_into_events to use_exi…
mohamedelabbas1996 Jul 25, 2025
a800802
Omit redundant use_existing=True from group_images_into_events call
mohamedelabbas1996 Aug 5, 2025
931a686
Set use_existing=False to ensure all images are regrouped, not just o…
mohamedelabbas1996 Aug 5, 2025
541f26b
Merge branch 'main' of github.com:RolnickLab/antenna into fix/group-i…
mihow Aug 29, 2025
9ecf60c
Merge branch 'main' of github.com:RolnickLab/antenna into fix/group-i…
mihow Aug 29, 2025
f1ab1b4
chore: rebase migration
mihow Aug 29, 2025
3583da5
fix: ignore existing events when testing grouping
mihow Aug 29, 2025
21afc3c
chore: add type hint
mihow Aug 29, 2025
1b46b35
Add assertions to ensure event.end is not None during grouping
mohamedelabbas1996 Aug 29, 2025
9808407
feat: management command to audit and update event groups
mihow Sep 2, 2025
8ec6671
fix: delete empty events AFTER occurrences are updated
mihow Sep 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions ami/main/migrations/0061_remove_event_unique_event_and_more.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Generated by Django 4.2.10 on 2025-07-23 05:15

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("main", "0060_alter_sourceimagecollection_method"),
]

operations = [
migrations.RemoveConstraint(
model_name="event",
name="unique_event",
),
migrations.RemoveIndex(
model_name="event",
name="main_event_group_b_6ce666_idx",
),
migrations.RemoveField(
model_name="event",
name="group_by",
),
migrations.AddConstraint(
model_name="event",
constraint=models.UniqueConstraint(fields=("deployment", "start", "end"), name="unique_event"),
),
]
138 changes: 74 additions & 64 deletions ami/main/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -785,18 +785,6 @@ def save(self, update_calculated_fields=True, *args, **kwargs):
class Event(BaseModel):
"""A monitoring session"""

group_by = models.CharField(
max_length=255,
db_index=True,
help_text=(
"A unique identifier for this event, used to group images into events. "
"This allows images to be prepended or appended to an existing event. "
"The default value is the day the event started, in the format YYYY-MM-DD. "
"However images could also be grouped by camera settings, image dimensions, hour of day, "
"or a random sample."
),
)

start = models.DateTimeField(db_index=True, help_text="The timestamp of the first image in the event.")
end = models.DateTimeField(null=True, blank=True, help_text="The timestamp of the last image in the event.")

Expand All @@ -815,11 +803,10 @@ class Event(BaseModel):
class Meta:
ordering = ["start"]
indexes = [
models.Index(fields=["group_by"]),
models.Index(fields=["start"]),
]
constraints = [
models.UniqueConstraint(fields=["deployment", "group_by"], name="unique_event"),
models.UniqueConstraint(fields=["deployment", "start", "end"], name="unique_event"),
]

def __str__(self) -> str:
Expand Down Expand Up @@ -918,10 +905,6 @@ def update_calculated_fields(self, save=False, updated_timestamp: datetime.datet
Important: if you update a new field, add it to the bulk_update call in update_calculated_fields_for_events
"""
event = self
if not event.group_by and event.start:
# If no group_by is set, use the start "day"
event.group_by = str(event.start.date())

if not event.project and event.deployment:
event.project = event.deployment.project

Expand Down Expand Up @@ -982,7 +965,6 @@ def update_calculated_fields_for_events(
updated_count = Event.objects.bulk_update(
to_update,
[
"group_by",
"start",
"end",
"project",
Expand All @@ -998,9 +980,14 @@ def update_calculated_fields_for_events(


def group_images_into_events(
deployment: Deployment, max_time_gap=datetime.timedelta(minutes=120), delete_empty=True
deployment: Deployment,
max_time_gap=datetime.timedelta(minutes=120),
delete_empty=True,
use_existing=True,
) -> list[Event]:
# Log a warning if multiple SourceImages have the same timestamp
logger.info(f"Grouping images into events for deployment '{deployment}' (use_existing={use_existing})")

# Log duplicate timestamps
dupes = (
SourceImage.objects.filter(deployment=deployment)
.values("timestamp")
Expand All @@ -1016,60 +1003,82 @@ def group_images_into_events(
f"Found {len(values)} images with the same timestamp in deployment '{deployment}'. "
f"Only one image will be used for each timestamp for each event."
)
# Get all images
image_qs = SourceImage.objects.filter(deployment=deployment).exclude(timestamp=None)
if use_existing:
# Get only newly added images (images without an event)
image_qs = image_qs.filter(event__isnull=True)

images = list(image_qs.order_by("timestamp"))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You likely don't have to evaluate the queryset yet with list(image_qs). You can check if images are found with images_qs.exists(), which is efficient for large datasets.

if not images:
logger.info("No relevant images found; skipping")
return []

image_timestamps = list(
SourceImage.objects.filter(deployment=deployment)
.exclude(timestamp=None)
.values_list("timestamp", flat=True)
.order_by("timestamp")
.distinct()
)

timestamp_groups = ami.utils.dates.group_datetimes_by_gap(image_timestamps, max_time_gap)
# @TODO this event grouping needs testing. Still getting events over 24 hours
# timestamp_groups = ami.utils.dates.group_datetimes_by_shifted_day(image_timestamps)
# Group timestamps
timestamps = list(image_qs.order_by("timestamp").values_list("timestamp", flat=True).distinct())
timestamp_groups = ami.utils.dates.group_datetimes_by_gap(timestamps, max_time_gap)

existing_events = list(Event.objects.filter(deployment=deployment))
events = []

# For each group of images check if we can merge with an existing
# event based on overlapping or proximity if use_existing is True.
# Otherwise if use is_existing is False, we look for an existing event
# with the exact same start and end times and reuse it,
# if not found create a new event.
Copy link

Copilot AI Jul 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing word in comment. Should be 'if none is found, create a new event.' or 'if not found, create a new event.'

Suggested change
# if not found create a new event.
# if not found, create a new event.

Copilot uses AI. Check for mistakes.
for group in timestamp_groups:
if not len(group):
continue

start_date = group[0]
end_date = group[-1]

# Print debugging info about groups
delta = end_date - start_date
hours = round(delta.seconds / 60 / 60, 1)
logger.debug(
f"Found session starting at {start_date} with {len(group)} images that ran for {hours} hours.\n"
f"From {start_date.strftime('%c')} to {end_date.strftime('%c')}."
)
group_start, group_end = group[0], group[-1]
group_set = set(group)
group_image_ids = [img.pk for img in images if img.timestamp in group_set]
Copy link

Copilot AI Jul 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This list comprehension creates a new list for each group and checks membership in group_set for every image. Consider creating a mapping of timestamps to image IDs beforehand to improve performance when processing multiple groups.

Suggested change
for group in timestamp_groups:
if not len(group):
continue
start_date = group[0]
end_date = group[-1]
# Print debugging info about groups
delta = end_date - start_date
hours = round(delta.seconds / 60 / 60, 1)
logger.debug(
f"Found session starting at {start_date} with {len(group)} images that ran for {hours} hours.\n"
f"From {start_date.strftime('%c')} to {end_date.strftime('%c')}."
)
group_start, group_end = group[0], group[-1]
group_set = set(group)
group_image_ids = [img.pk for img in images if img.timestamp in group_set]
# Precompute a mapping of timestamps to image IDs
timestamp_to_image_ids = collections.defaultdict(list)
for img in images:
timestamp_to_image_ids[img.timestamp].append(img.pk)
for group in timestamp_groups:
group_start, group_end = group[0], group[-1]
group_image_ids = []
for timestamp in group:
group_image_ids.extend(timestamp_to_image_ids.get(timestamp, []))

Copilot uses AI. Check for mistakes.

event = None
if use_existing:
# Look for overlap or proximity
for existing_event in existing_events:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you are looping over a queryset, you can do for existing_event in events_qs, which supposedly avoids loading the whole queryset result into memory. Sometimes you need to convert to a list so you can index the list like events[3], but often you never need to convert to a list.

existing_event.refresh_from_db(fields=["start", "end"])
Copy link

Copilot AI Jul 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Database refresh is called for every existing event in every group iteration. Consider fetching fresh event data once before the loop or only refresh when actually needed after a merge.

Copilot uses AI. Check for mistakes.
overlaps = group_start <= existing_event.end and group_end >= existing_event.start
close_enough = (
abs(group_start - existing_event.end) <= max_time_gap
or abs(existing_event.start - group_end) <= max_time_gap
)

# Creating events & assigning images
group_by = start_date.date()
event, _ = Event.objects.get_or_create(
deployment=deployment,
group_by=group_by,
defaults={"start": start_date, "end": end_date},
)
events.append(event)
SourceImage.objects.filter(deployment=deployment, timestamp__in=group).update(event=event)
event.save() # Update start and end times and other cached fields
logger.info(
f"Created/updated event {event} with {len(group)} images for deployment {deployment}. "
f"Duration: {event.duration_label()}"
)
if overlaps or close_enough:
event = existing_event
break
else:
# Look for exact match
event = Event.objects.filter(
deployment=deployment,
start=group_start,
end=group_end,
).first()

if event:
if use_existing:
# Adjust times if necessary (merge)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this check necessary? I think you just checked if and existing event has the exact start & end time. Perhaps you meant to do an OR query? If an existing event has either the same start or end time as the group.

If there is an existing event with exactly the same start AND end time (for same deployment), then I don't think we should check for use_existing. Just re-use those without question.

if group_start < event.start or group_end > event.end:
event.start = min(event.start, group_start)
event.end = max(event.end, group_end)
logger.info(f"{'Merged' if use_existing else 'Reused'} event {event} for {len(group_image_ids)} images")
else:
# Create new event
event = Event.objects.create(
deployment=deployment,
start=group_start,
end=group_end,
)
logger.info(f"Created new event {event} with {len(group_image_ids)} images")

logger.info(
f"Done grouping {len(image_timestamps)} captures into {len(events)} events " f"for deployment {deployment}"
)
SourceImage.objects.filter(id__in=group_image_ids).update(event=event)
event.save()
events.append(event)

# Final processing
if delete_empty:
logger.info("Deleting empty events for deployment")
delete_empty_events(deployment=deployment)

for event in events:
# Set the width and height of all images in each event based on the first image
logger.info(f"Setting image dimensions for event {event}")
set_dimensions_for_collection(event)

Expand All @@ -1088,9 +1097,10 @@ def group_images_into_events(
)

logger.info("Updating relevant cached fields on deployment")
deployment.events_count = len(events)
deployment.events_count = Event.objects.filter(deployment=deployment).count()
deployment.save(update_calculated_fields=False, update_fields=["events_count"])

logger.info(f"Finished grouping {len(timestamps)} images into {len(events)} events for deployment '{deployment}'")
return events


Expand Down
Loading