Skip to content

Commit fca525f

Browse files
Abdullah AlaqeelMercymeIlya
authored andcommitted
feat: give users the option to run the json migration asyncly (jazzband#495)
1 parent 73800bb commit fca525f

File tree

9 files changed

+359
-12
lines changed

9 files changed

+359
-12
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
#### Breaking Changes
66

7-
- feat: Change `LogEntry.change` field type to `JSONField` rather than `TextField`. This change include a migration that may take time to run depending on the number of records on your `LogEntry` table ([#407](https://github.com/jazzband/django-auditlog/pull/407))
7+
- feat: Change `LogEntry.change` field type to `JSONField` rather than `TextField`. This change include a migration that may take time to run depending on the number of records on your `LogEntry` table ([#407](https://github.com/jazzband/django-auditlog/pull/407))([#495](https://github.com/jazzband/django-auditlog/pull/495))
88
- feat: stop deleting old log entries when a model with the same pk is created (i.e. the pk value is reused) ([#559](https://github.com/jazzband/django-auditlog/pull/559))
99
- feat: Set `AuditlogHistoryField.delete_related` to `False` by default. This is different from the default configuration of Django's `GenericRelation`, but we should not erase the audit log of objects on deletion by default. ([#557](https://github.com/jazzband/django-auditlog/pull/557))
1010
- Python: Drop support for Python 3.7 ([#546](https://github.com/jazzband/django-auditlog/pull/546))

auditlog/apps.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,7 @@ def ready(self):
1111
from auditlog.registry import auditlog
1212

1313
auditlog.register_from_settings()
14+
15+
from auditlog import models
16+
17+
models.changes_func = models._changes_func()

auditlog/conf.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,11 @@
3232
settings, "AUDITLOG_CID_HEADER", "x-correlation-id"
3333
)
3434
settings.AUDITLOG_CID_GETTER = getattr(settings, "AUDITLOG_CID_GETTER", None)
35+
36+
# migration
37+
settings.AUDITLOG_TWO_STEP_MIGRATION = getattr(
38+
settings, "AUDITLOG_TWO_STEP_MIGRATION", False
39+
)
40+
settings.AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT = getattr(
41+
settings, "AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT", False
42+
)
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
from math import ceil
2+
3+
from django.conf import settings
4+
from django.core.management.base import BaseCommand
5+
6+
from auditlog.models import LogEntry
7+
8+
9+
class Command(BaseCommand):
10+
help = "Migrates changes from changes_text to json changes."
11+
12+
def add_arguments(self, parser):
13+
parser.add_argument(
14+
"-d",
15+
"--database",
16+
default=None,
17+
help="If provided, the script will use native db operations. "
18+
"Otherwise, it will use LogEntry.objects.bulk_create",
19+
dest="db",
20+
type=str,
21+
choices=["postgres", "mysql", "oracle"],
22+
)
23+
parser.add_argument(
24+
"-b",
25+
"--bactch-size",
26+
default=500,
27+
help="Split the migration into multiple batches. If 0, then no batching will be done. "
28+
"When passing a -d/database, the batch value will be ignored.",
29+
dest="batch_size",
30+
type=int,
31+
)
32+
33+
def handle(self, *args, **options):
34+
database = options["db"]
35+
batch_size = options["batch_size"]
36+
37+
if not self.check_logs():
38+
return
39+
40+
if database:
41+
result = self.migrate_using_sql(database)
42+
self.stdout.write(
43+
f"Updated {result} records using native database operations."
44+
)
45+
else:
46+
result = self.migrate_using_django(batch_size)
47+
self.stdout.write(f"Updated {result} records using django operations.")
48+
49+
self.check_logs()
50+
51+
def check_logs(self):
52+
count = self.get_logs().count()
53+
if count:
54+
self.stdout.write(f"There are {count} records that needs migration.")
55+
return True
56+
57+
self.stdout.write("All records are have been migrated.")
58+
if settings.AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT:
59+
self.stdout.write(
60+
"You can now set AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT to False."
61+
)
62+
63+
return False
64+
65+
def get_logs(self):
66+
return LogEntry.objects.filter(
67+
changes_text__isnull=False, changes__isnull=True
68+
).exclude(changes_text__exact="")
69+
70+
def migrate_using_django(self, batch_size):
71+
def _apply_django_migration(_logs) -> int:
72+
import json
73+
74+
updated = []
75+
for log in _logs:
76+
try:
77+
log.changes = json.loads(log.changes_text)
78+
except ValueError:
79+
self.stderr.write(
80+
f"ValueError was raised while migrating the log with id {log.id}."
81+
)
82+
else:
83+
updated.append(log)
84+
85+
LogEntry.objects.bulk_update(updated, fields=["changes"])
86+
return len(updated)
87+
88+
logs = self.get_logs()
89+
90+
if not batch_size:
91+
return _apply_django_migration(logs)
92+
93+
total_updated = 0
94+
for _ in range(ceil(logs.count() / batch_size)):
95+
total_updated += _apply_django_migration(self.get_logs()[:batch_size])
96+
return total_updated
97+
98+
def migrate_using_sql(self, database):
99+
from django.db import connection
100+
101+
def postgres():
102+
with connection.cursor() as cursor:
103+
cursor.execute(
104+
'UPDATE auditlog_logentry SET changes="changes_text"::jsonb'
105+
)
106+
return cursor.cursor.rowcount
107+
108+
if database == "postgres":
109+
return postgres()
110+
else:
111+
self.stderr.write(
112+
"Not yet implemented. Run this management command without passing a -d/--database argument."
113+
)
114+
return 0
Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,43 @@
11
# Generated by Django 4.0 on 2022-08-04 15:41
2+
from typing import List
23

4+
from django.conf import settings
35
from django.db import migrations, models
46

57

6-
class Migration(migrations.Migration):
7-
8-
dependencies = [
9-
("auditlog", "0016_logentry_cid"),
10-
]
8+
def two_step_migrations() -> List:
9+
if settings.AUDITLOG_TWO_STEP_MIGRATION:
10+
return [
11+
migrations.RenameField(
12+
model_name="logentry",
13+
old_name="changes",
14+
new_name="changes_text",
15+
),
16+
migrations.AddField(
17+
model_name="logentry",
18+
name="changes",
19+
field=models.JSONField(null=True, verbose_name="change message"),
20+
),
21+
]
1122

12-
operations = [
23+
return [
24+
migrations.AddField(
25+
model_name="logentry",
26+
name="changes_text",
27+
field=models.TextField(blank=True, verbose_name="text change message"),
28+
),
1329
migrations.AlterField(
1430
model_name="logentry",
1531
name="changes",
1632
field=models.JSONField(null=True, verbose_name="change message"),
1733
),
1834
]
35+
36+
37+
class Migration(migrations.Migration):
38+
39+
dependencies = [
40+
("auditlog", "0016_logentry_cid"),
41+
]
42+
43+
operations = [*two_step_migrations()]

auditlog/models.py

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import ast
2+
import contextlib
23
import json
34
from copy import deepcopy
45
from datetime import timezone
5-
from typing import Any, Dict, List
6+
from typing import Any, Callable, Dict, List
67

78
from dateutil import parser
89
from dateutil.tz import gettz
@@ -201,7 +202,7 @@ def _get_pk_value(self, instance):
201202
pk_field = instance._meta.pk.name
202203
pk = getattr(instance, pk_field, None)
203204

204-
# Check to make sure that we got an pk not a model object.
205+
# Check to make sure that we got a pk not a model object.
205206
if isinstance(pk, models.Model):
206207
pk = self._get_pk_value(pk)
207208
return pk
@@ -334,6 +335,7 @@ class Action:
334335
action = models.PositiveSmallIntegerField(
335336
choices=Action.choices, verbose_name=_("action"), db_index=True
336337
)
338+
changes_text = models.TextField(blank=True, verbose_name=_("change message"))
337339
changes = models.JSONField(null=True, verbose_name=_("change message"))
338340
actor = models.ForeignKey(
339341
to=settings.AUTH_USER_MODEL,
@@ -388,7 +390,7 @@ def changes_dict(self):
388390
"""
389391
:return: The changes recorded in this log entry as a dictionary object.
390392
"""
391-
return self.changes or {}
393+
return changes_func(self)
392394

393395
@property
394396
def changes_str(self, colon=": ", arrow=" \u2192 ", separator="; "):
@@ -436,7 +438,7 @@ def changes_display_dict(self):
436438
changes_display_dict[field_name] = values
437439
continue
438440
values_display = []
439-
# handle choices fields and Postgres ArrayField to get human readable version
441+
# handle choices fields and Postgres ArrayField to get human-readable version
440442
choices_dict = None
441443
if getattr(field, "choices", []):
442444
choices_dict = dict(field.choices)
@@ -495,7 +497,7 @@ class AuditlogHistoryField(GenericRelation):
495497
A subclass of py:class:`django.contrib.contenttypes.fields.GenericRelation` that sets some default
496498
variables. This makes it easier to access Auditlog's log entries, for example in templates.
497499
498-
By default this field will assume that your primary keys are numeric, simply because this is the most
500+
By default, this field will assume that your primary keys are numeric, simply because this is the most
499501
common case. However, if you have a non-integer primary key, you can simply pass ``pk_indexable=False``
500502
to the constructor, and Auditlog will fall back to using a non-indexed text based field for this model.
501503
@@ -532,3 +534,24 @@ def bulk_related_objects(self, objs, using=DEFAULT_DB_ALIAS):
532534
# method. However, because we don't want to delete these related
533535
# objects, we simply return an empty list.
534536
return []
537+
538+
539+
# should I add a signal receiver for setting_changed?
540+
changes_func = None
541+
542+
543+
def _changes_func() -> Callable[[LogEntry], Dict]:
544+
def json_then_text(instance: LogEntry) -> Dict:
545+
if instance.changes:
546+
return instance.changes
547+
elif instance.changes_text:
548+
with contextlib.suppress(ValueError):
549+
return json.loads(instance.changes_text)
550+
return {}
551+
552+
def default(instance: LogEntry) -> Dict:
553+
return instance.changes or {}
554+
555+
if settings.AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT:
556+
return json_then_text
557+
return default

0 commit comments

Comments
 (0)