Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ability to click on the affected URLs of a pattern and see which URLs have been affected #1038

Open
wants to merge 26 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
933852b
Add 'Show Affected URLs' button and configure click handler
Kirandawadi Sep 17, 2024
b9d2385
Add necessary files for 'Affected URLs' page
Kirandawadi Sep 17, 2024
e89eaa1
Add DataTables features to the page
Kirandawadi Sep 19, 2024
194929a
Affected URLs page for excluded patterns working properly
Kirandawadi Sep 23, 2024
b15410e
add view affected urls button to title patterns and document-type pat…
Kirandawadi Sep 25, 2024
7a279fc
finalise the affected urls page for all the pattern types
Kirandawadi Sep 25, 2024
e736d2c
Refactor AffectedURLsListView into pattern-specific views
Kirandawadi Sep 26, 2024
242d414
remove unnecessary comments and format code
Kirandawadi Sep 26, 2024
32ed7f8
add integrity check to contents loaded from CDN
Kirandawadi Sep 26, 2024
74e074a
remove unnecessary css declarations
Kirandawadi Sep 26, 2024
dad63b0
remove ordering functionality from second row of the datatable
Kirandawadi Sep 27, 2024
11959fc
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 27, 2024
e68b50e
fix issues shown by flake8
Kirandawadi Sep 27, 2024
c545b8c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 27, 2024
4205c1d
added include-pattern functionality; fixed candidate-urls-api (inclus…
Kirandawadi Sep 28, 2024
42cdda0
remove 'add pattern' button from unnecessary pages
Kirandawadi Oct 10, 2024
6b21213
add serializer class to affected urls functionalities
Kirandawadi Oct 11, 2024
9d666b2
add datatables reload() feature to skip unnecessary ui modifications
Kirandawadi Oct 11, 2024
6ec4943
fix ajax issues of datatable
Kirandawadi Oct 12, 2024
37bdb7c
add 'include pattern' feature on right mouse click
Kirandawadi Oct 19, 2024
1056ac9
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 21, 2024
525e6a2
fix flake8 issues
Kirandawadi Oct 21, 2024
597252c
add checks to see if include pattern already exists
Kirandawadi Oct 21, 2024
4fd628b
fix 'exclude' column of 'Candidate URLs' page
Kirandawadi Oct 25, 2024
1179641
fix 'affected urls' count
Kirandawadi Oct 25, 2024
000153c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions sde_collections/models/candidate_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,24 @@

from .collection import Collection
from .collection_choice_fields import Divisions, DocumentTypes
from .pattern import ExcludePattern, TitlePattern
from .pattern import ExcludePattern, IncludePattern, TitlePattern


class CandidateURLQuerySet(models.QuerySet):
def with_exclusion_status(self):
def with_exclusion_and_inclusion_status(self):
return self.annotate(
excluded=models.Exists(
ExcludePattern.candidate_urls.through.objects.filter(candidateurl=models.OuterRef("pk"))
)
),
included=models.Exists(
IncludePattern.candidate_urls.through.objects.filter(candidateurl=models.OuterRef("pk"))
),
)


class CandidateURLManager(models.Manager):
def get_queryset(self):
return CandidateURLQuerySet(self.model, using=self._db).with_exclusion_status()
return CandidateURLQuerySet(self.model, using=self._db).with_exclusion_and_inclusion_status()


class CandidateURL(models.Model):
Expand Down
97 changes: 97 additions & 0 deletions sde_collections/serializers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from django.db import models
from rest_framework import serializers

from .models.candidate_url import CandidateURL
Expand Down Expand Up @@ -63,6 +64,59 @@ class CandidateURLSerializer(serializers.ModelSerializer):
match_pattern_type = serializers.SerializerMethodField(read_only=True)
candidate_urls_count = serializers.SerializerMethodField(read_only=True)

# New fields for annotated parameters
included = serializers.BooleanField(read_only=True)

def get_candidate_urls_count(self, obj):
titlepattern = obj.titlepattern_urls.last()
return titlepattern.candidate_urls.count() if titlepattern else 0

def get_generated_title_id(self, obj):
titlepattern = obj.titlepattern_urls.last()
return titlepattern.id if titlepattern else None

def get_match_pattern_type(self, obj):
titlepattern = obj.titlepattern_urls.last()
return titlepattern.match_pattern_type if titlepattern else None

class Meta:
model = CandidateURL
fields = (
"id",
"excluded",
"included",
"url",
"scraped_title",
"generated_title",
"generated_title_id",
"match_pattern_type",
"candidate_urls_count",
"document_type",
"document_type_display",
"division",
"division_display",
"visited",
"test_title",
"production_title",
"present_on_test",
"present_on_prod",
)


class AffectedURLSerializer(serializers.ModelSerializer):
excluded = serializers.BooleanField(required=False)
document_type_display = serializers.CharField(source="get_document_type_display", read_only=True)
division_display = serializers.CharField(source="get_division_display", read_only=True)
url = serializers.CharField(required=False)
generated_title_id = serializers.SerializerMethodField(read_only=True)
match_pattern_type = serializers.SerializerMethodField(read_only=True)
candidate_urls_count = serializers.SerializerMethodField(read_only=True)

# New fields for annotated parameters
included = serializers.BooleanField(read_only=True)
included_by_pattern = serializers.CharField(read_only=True)
match_pattern_id = serializers.IntegerField(read_only=True)

def get_candidate_urls_count(self, obj):
titlepattern = obj.titlepattern_urls.last()
return titlepattern.candidate_urls.count() if titlepattern else 0
Expand Down Expand Up @@ -95,6 +149,9 @@ class Meta:
"production_title",
"present_on_test",
"present_on_prod",
"included", # New field
"included_by_pattern", # New field
"match_pattern_id", # New field
)


Expand Down Expand Up @@ -172,6 +229,27 @@ class Meta:
model = ExcludePattern
fields = BasePatternSerializer.Meta.fields + ("reason",)

def get_candidate_urls_count(self, instance):
# Count URLs matched by the excluded pattern
matched_urls = instance.candidate_urls
matched_urls_count = matched_urls.count()

# Get the IDs of the matched URLs
matched_url_ids = matched_urls.values_list("id", flat=True)

# Count URLs included by other patterns in the same collection
included_urls_count = (
IncludePattern.objects.filter(collection=instance.collection, candidate_urls__in=matched_url_ids)
.annotate(included_count=models.Count("candidate_urls"))
.aggregate(total=models.Sum("included_count"))["total"]
or 0
)

# Calculate effective URLs count
effective_urls_count = matched_urls_count - included_urls_count

return effective_urls_count


class IncludePatternSerializer(BasePatternSerializer, serializers.ModelSerializer):
class Meta:
Expand Down Expand Up @@ -245,3 +323,22 @@ def validate_match_pattern(self, value):
except DivisionPattern.DoesNotExist:
pass
return value


class BaseAffectedURLSerializer(serializers.ModelSerializer):
match_pattern_type_display = serializers.CharField(source="get_match_pattern_type_display", read_only=True)
candidate_urls_count = serializers.SerializerMethodField(read_only=True)

def get_candidate_urls_count(self, instance):
return instance.candidate_urls.count()

class Meta:
fields = (
"id",
"collection",
"match_pattern",
"match_pattern_type",
"match_pattern_type_display",
"candidate_urls_count",
)
abstract = True
34 changes: 34 additions & 0 deletions sde_collections/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,20 @@
router.register(r"collections", views.CollectionViewSet, basename="collection")
router.register(r"collections-read", views.CollectionReadViewSet, basename="collection-read")
router.register(r"candidate-urls", views.CandidateURLViewSet)
router.register(
r"include-pattern-affected-urls", views.IncludePatternAffectedURLsViewSet, basename="include-pattern-affected-urls"
)
router.register(
r"exclude-pattern-affected-urls", views.ExcludePatternAffectedURLsViewSet, basename="exclude-pattern-affected-urls"
)
router.register(
r"title-pattern-affected-urls", views.TitlePatternAffectedURLsViewSet, basename="title-pattern-affected-urls"
)
router.register(
r"documenttype-pattern-affected-urls",
views.DocumentTypePatternAffectedURLsViewSet,
basename="documenttype-pattern-affected-urls",
)
router.register(r"exclude-patterns", views.ExcludePatternViewSet)
router.register(r"include-patterns", views.IncludePatternViewSet)
router.register(r"title-patterns", views.TitlePatternViewSet)
Expand Down Expand Up @@ -43,6 +57,26 @@
view=views.CandidateURLsListView.as_view(),
name="candidate_urls",
),
path(
"exclude-pattern/<int:id>/",
view=views.ExcludePatternAffectedURLsListView.as_view(),
name="affected_urls",
),
path(
"include-pattern/<int:id>/",
view=views.IncludePatternAffectedURLsListView.as_view(),
name="affected_urls",
),
path(
"title-pattern/<int:id>/",
view=views.TitlePatternAffectedURLsListView.as_view(),
name="affected_urls",
),
path(
"document-type-pattern/<int:id>/",
view=views.DocumentTypePatternAffectedURLsListView.as_view(),
name="affected_urls",
),
path(
"consolidate/",
view=views.WebappGitHubConsolidationView.as_view(),
Expand Down
137 changes: 135 additions & 2 deletions sde_collections/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
TitlePattern,
)
from .serializers import (
AffectedURLSerializer,
CandidateURLAPISerializer,
CandidateURLBulkCreateSerializer,
CandidateURLSerializer,
Expand Down Expand Up @@ -226,6 +227,72 @@ def get_context_data(self, **kwargs):
return context


class BaseAffectedURLsListView(LoginRequiredMixin, ListView):
"""
Base view for displaying a list of URLs affected by a match pattern
"""

model = CandidateURL
template_name = "sde_collections/affected_urls.html"
context_object_name = "affected_urls"
pattern_model = None
pattern_type = None

def get_queryset(self):
self.pattern = self.pattern_model.objects.get(id=self.kwargs["id"])
queryset = self.pattern.matched_urls()
return queryset

def get_context_data(self, **kwargs):
context = super().get_context_data(**kwargs)
context["pattern"] = self.pattern
context["pattern_id"] = self.kwargs["id"]
context["url_count"] = self.get_queryset().count()
context["collection"] = self.pattern.collection
context["pattern_type"] = self.pattern_type
return context


class ExcludePatternAffectedURLsListView(BaseAffectedURLsListView):
pattern_model = ExcludePattern
pattern_type = "Exclude"

def get_queryset(self):
self.pattern = self.pattern_model.objects.get(id=self.kwargs["id"])
queryset = self.pattern.matched_urls()

# Subquery to get the match_pattern and id of the IncludePattern
include_pattern_subquery = IncludePattern.objects.filter(candidate_urls=models.OuterRef("pk")).values(
"match_pattern", "id"
)[:1]

# Annotate with inclusion status, match_pattern, and id of the IncludePattern
queryset = queryset.annotate(
included=models.Exists(include_pattern_subquery),
included_by_pattern=models.Subquery(
include_pattern_subquery.values("match_pattern"), output_field=models.CharField()
),
match_pattern_id=models.Subquery(include_pattern_subquery.values("id"), output_field=models.IntegerField()),
)

return queryset


class IncludePatternAffectedURLsListView(BaseAffectedURLsListView):
pattern_model = IncludePattern
pattern_type = "Include"


class TitlePatternAffectedURLsListView(BaseAffectedURLsListView):
pattern_model = TitlePattern
pattern_type = "Title"


class DocumentTypePatternAffectedURLsListView(BaseAffectedURLsListView):
pattern_model = DocumentTypePattern
pattern_type = "Document Type"


class SdeDashboardView(LoginRequiredMixin, ListView):
model = Collection
template_name = "sde_collections/sde_dashboard.html"
Expand Down Expand Up @@ -272,6 +339,18 @@ def get_queryset(self):
is_excluded = self.request.GET.get("is_excluded")
if is_excluded:
queryset = self._filter_by_is_excluded(queryset, is_excluded)

collection_id = self.request.GET.get("collection_id")
if collection_id:
queryset = queryset.annotate(
included=models.Exists(
IncludePattern.candidate_urls.through.objects.filter(
candidateurl=models.OuterRef("pk"),
includepattern__collection_id=collection_id, # Filter by the specific collection
)
)
)

return queryset.order_by("url")

def update_division(self, request, pk=None):
Expand Down Expand Up @@ -318,8 +397,8 @@ def get(self, request, *args, **kwargs):
def get_queryset(self):
queryset = (
CandidateURL.objects.filter(collection__config_folder=self.config_folder)
.with_exclusion_status()
.filter(excluded=False)
.with_exclusion_and_inclusion_status()
.filter(models.Q(excluded=False) | models.Q(included=True))
)
return queryset

Expand Down Expand Up @@ -554,3 +633,57 @@ def get(self, request, *args, **kwargs):
"resolved_title_errors": resolved_title_errors,
}
return render(request, "sde_collections/titles_and_errors_list.html", context)


class BaseAffectedURLsViewSet(CollectionFilterMixin, viewsets.ModelViewSet):
queryset = CandidateURL.objects.all()
serializer_class = AffectedURLSerializer
pattern_model = None
pattern_type = None

def get_queryset(self):
pattern_id = self.request.GET.get("pattern_id")
self.pattern = self.pattern_model.objects.get(id=pattern_id)
queryset = self.pattern.matched_urls()
return queryset


class IncludePatternAffectedURLsViewSet(BaseAffectedURLsViewSet):
pattern_model = IncludePattern
pattern_type = "Include"


class ExcludePatternAffectedURLsViewSet(BaseAffectedURLsViewSet):
pattern_model = ExcludePattern
pattern_type = "Exclude"

def get_queryset(self):
pattern_id = self.request.GET.get("pattern_id")
self.pattern = self.pattern_model.objects.get(id=pattern_id)
queryset = self.pattern.matched_urls()

# Subquery to get the match_pattern and id of the IncludePattern
include_pattern_subquery = IncludePattern.objects.filter(candidate_urls=models.OuterRef("pk")).values(
"match_pattern", "id"
)[:1]

# Annotate with inclusion status, match_pattern, and id of the IncludePattern
queryset = queryset.annotate(
included=models.Exists(include_pattern_subquery),
included_by_pattern=models.Subquery(
include_pattern_subquery.values("match_pattern"), output_field=models.CharField()
),
match_pattern_id=models.Subquery(include_pattern_subquery.values("id"), output_field=models.IntegerField()),
)

return queryset


class TitlePatternAffectedURLsViewSet(BaseAffectedURLsViewSet):
pattern_model = TitlePattern
pattern_type = "Title"


class DocumentTypePatternAffectedURLsViewSet(BaseAffectedURLsViewSet):
pattern_model = DocumentTypePattern
pattern_type = "Document Type"
Loading