Skip to content

Commit

Permalink
feat: investigate file authenticity (#7331)
Browse files Browse the repository at this point in the history
* feat: investigate file authenticity

* fix: use django-provided validation
  • Loading branch information
rjsparks committed Apr 23, 2024
1 parent 0616b07 commit 6b11291
Show file tree
Hide file tree
Showing 7 changed files with 325 additions and 4 deletions.
21 changes: 21 additions & 0 deletions ietf/doc/forms.py
Expand Up @@ -266,3 +266,24 @@ def clean(self):
@staticmethod
def valid_resource_tags():
return ExtResourceName.objects.all().order_by('slug').values_list('slug', flat=True)

class InvestigateForm(forms.Form):
name_fragment = forms.CharField(
label="File name or fragment to investigate",
required=True,
help_text=(
"Enter a filename such as draft-ietf-some-draft-00.txt or a fragment like draft-ietf-some-draft using at least 8 characters. The search will also work for files that are not necessarily drafts."
),
min_length=8,
)

def clean_name_fragment(self):
disallowed_characters = ["%", "/", "\\", "*"]
name_fragment = self.cleaned_data["name_fragment"]
# Manual inspection of the directories at the time of this writing shows
# looking for files with less than 8 characters in the name is not useful
# Requiring this will help protect against the secretariat unintentionally
# matching every draft.
if any(c in name_fragment for c in disallowed_characters):
raise ValidationError(f"The following characters are disallowed: {', '.join(disallowed_characters)}")
return name_fragment
30 changes: 30 additions & 0 deletions ietf/doc/templatetags/ietf_filters.py
Expand Up @@ -4,6 +4,7 @@

import datetime
import re
from pathlib import Path
from urllib.parse import urljoin
from zoneinfo import ZoneInfo

Expand Down Expand Up @@ -899,3 +900,32 @@ def simple_history_delta_change_cnt(history):
delta = history.diff_against(prev)
return len(delta.changes)
return 0

@register.filter
def mtime(path):
"""Returns a datetime object representing mtime given a pathlib Path object"""
return datetime.datetime.fromtimestamp(path.stat().st_mtime).astimezone(ZoneInfo(settings.TIME_ZONE))

@register.filter
def url_for_path(path):
"""Consructs a 'best' URL for web access to the given pathlib Path object.
Assumes that the path is into the Internet-Draft archive or the proceedings.
"""
if path.match(f"{settings.AGENDA_PATH}/**/*"):
return (
f"https://www.ietf.org/proceedings/{path.relative_to(settings.AGENDA_PATH)}"
)
elif any(
[
pathdir in path.parents
for pathdir in [
Path(settings.INTERNET_DRAFT_PATH),
Path(settings.INTERNET_DRAFT_ARCHIVE_DIR).parent,
Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR),
]
]
):
return f"{settings.IETF_ID_ARCHIVE_URL}{path.name}"
else:
return "#"
136 changes: 135 additions & 1 deletion ietf/doc/tests.py
Expand Up @@ -45,7 +45,7 @@
StatusChangeFactory, DocExtResourceFactory, RgDraftFactory, BcpFactory)
from ietf.doc.forms import NotifyForm
from ietf.doc.fields import SearchableDocumentsField
from ietf.doc.utils import create_ballot_if_not_open, uppercase_std_abbreviated_name, DraftAliasGenerator
from ietf.doc.utils import create_ballot_if_not_open, investigate_fragment, uppercase_std_abbreviated_name, DraftAliasGenerator
from ietf.group.models import Group, Role
from ietf.group.factories import GroupFactory, RoleFactory
from ietf.ipr.factories import HolderIprDisclosureFactory
Expand Down Expand Up @@ -3141,3 +3141,137 @@ def test_state_index(self):
if not '-' in name:
self.assertIn(name, content)

class InvestigateTests(TestCase):
settings_temp_path_overrides = TestCase.settings_temp_path_overrides + [
"AGENDA_PATH",
# "INTERNET_DRAFT_PATH",
# "INTERNET_DRAFT_ARCHIVE_DIR",
# "INTERNET_ALL_DRAFTS_ARCHIVE_DIR",
]

def setUp(self):
super().setUp()
# Contort the draft archive dir temporary replacement
# to match the "collections" concept
archive_tmp_dir = Path(settings.INTERNET_DRAFT_ARCHIVE_DIR)
new_archive_dir = archive_tmp_dir / "draft-archive"
new_archive_dir.mkdir()
settings.INTERNET_DRAFT_ARCHIVE_DIR = str(new_archive_dir)
donated_personal_copy_dir = archive_tmp_dir / "donated-personal-copy"
donated_personal_copy_dir.mkdir()
meeting_dir = Path(settings.AGENDA_PATH) / "666"
meeting_dir.mkdir()
all_archive_dir = Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR)
repository_dir = Path(settings.INTERNET_DRAFT_PATH)

for path in [repository_dir, all_archive_dir]:
(path / "draft-this-is-active-00.txt").touch()
for path in [new_archive_dir, all_archive_dir]:
(path / "draft-old-but-can-authenticate-00.txt").touch()
(path / "draft-has-mixed-provenance-01.txt").touch()
for path in [donated_personal_copy_dir, all_archive_dir]:
(path / "draft-donated-from-a-personal-collection-00.txt").touch()
(path / "draft-has-mixed-provenance-00.txt").touch()
(path / "draft-has-mixed-provenance-00.txt.Z").touch()
(all_archive_dir / "draft-this-should-not-be-possible-00.txt").touch()
(meeting_dir / "draft-this-predates-the-archive-00.txt").touch()

def test_investigate_fragment(self):

result = investigate_fragment("this-is-active")
self.assertEqual(len(result["can_verify"]), 1)
self.assertEqual(len(result["unverifiable_collections"]), 0)
self.assertEqual(len(result["unexpected"]), 0)
self.assertEqual(
list(result["can_verify"])[0].name, "draft-this-is-active-00.txt"
)

result = investigate_fragment("old-but-can")
self.assertEqual(len(result["can_verify"]), 1)
self.assertEqual(len(result["unverifiable_collections"]), 0)
self.assertEqual(len(result["unexpected"]), 0)
self.assertEqual(
list(result["can_verify"])[0].name, "draft-old-but-can-authenticate-00.txt"
)

result = investigate_fragment("predates")
self.assertEqual(len(result["can_verify"]), 1)
self.assertEqual(len(result["unverifiable_collections"]), 0)
self.assertEqual(len(result["unexpected"]), 0)
self.assertEqual(
list(result["can_verify"])[0].name, "draft-this-predates-the-archive-00.txt"
)

result = investigate_fragment("personal-collection")
self.assertEqual(len(result["can_verify"]), 0)
self.assertEqual(len(result["unverifiable_collections"]), 1)
self.assertEqual(len(result["unexpected"]), 0)
self.assertEqual(
list(result["unverifiable_collections"])[0].name,
"draft-donated-from-a-personal-collection-00.txt",
)

result = investigate_fragment("mixed-provenance")
self.assertEqual(len(result["can_verify"]), 1)
self.assertEqual(len(result["unverifiable_collections"]), 2)
self.assertEqual(len(result["unexpected"]), 0)
self.assertEqual(
list(result["can_verify"])[0].name, "draft-has-mixed-provenance-01.txt"
)
self.assertEqual(
set([p.name for p in result["unverifiable_collections"]]),
set(
[
"draft-has-mixed-provenance-00.txt",
"draft-has-mixed-provenance-00.txt.Z",
]
),
)

result = investigate_fragment("not-be-possible")
self.assertEqual(len(result["can_verify"]), 0)
self.assertEqual(len(result["unverifiable_collections"]), 0)
self.assertEqual(len(result["unexpected"]), 1)
self.assertEqual(
list(result["unexpected"])[0].name,
"draft-this-should-not-be-possible-00.txt",
)

def test_investigate(self):
url = urlreverse("ietf.doc.views_doc.investigate")
login_testing_unauthorized(self, "secretary", url)
r = self.client.get(url)
self.assertEqual(r.status_code, 200)
q = PyQuery(r.content)
self.assertEqual(len(q("form#investigate")), 1)
self.assertEqual(len(q("div#results")), 0)
r = self.client.post(url, dict(name_fragment="this-is-not-found"))
self.assertEqual(r.status_code, 200)
q = PyQuery(r.content)
self.assertEqual(len(q("div#results")), 1)
self.assertEqual(len(q("table#authenticated")), 0)
self.assertEqual(len(q("table#unverifiable")), 0)
self.assertEqual(len(q("table#unexpected")), 0)
r = self.client.post(url, dict(name_fragment="mixed-provenance"))
self.assertEqual(r.status_code, 200)
q = PyQuery(r.content)
self.assertEqual(len(q("div#results")), 1)
self.assertEqual(len(q("table#authenticated")), 1)
self.assertEqual(len(q("table#unverifiable")), 1)
self.assertEqual(len(q("table#unexpected")), 0)
r = self.client.post(url, dict(name_fragment="not-be-possible"))
self.assertEqual(r.status_code, 200)
q = PyQuery(r.content)
self.assertEqual(len(q("div#results")), 1)
self.assertEqual(len(q("table#authenticated")), 0)
self.assertEqual(len(q("table#unverifiable")), 0)
self.assertEqual(len(q("table#unexpected")), 1)
r = self.client.post(url, dict(name_fragment="short"))
self.assertEqual(r.status_code, 200)
q = PyQuery(r.content)
self.assertEqual(len(q("#id_name_fragment.is-invalid")), 1)
for char in ["*", "%", "/", "\\"]:
r = self.client.post(url, dict(name_fragment=f"bad{char}character"))
self.assertEqual(r.status_code, 200)
q = PyQuery(r.content)
self.assertEqual(len(q("#id_name_fragment.is-invalid")), 1)
5 changes: 4 additions & 1 deletion ietf/doc/urls.py
Expand Up @@ -66,6 +66,8 @@
r"^shepherdwriteup-template/(?P<type>\w+)/?$",
views_doc.document_shepherd_writeup_template,
),
url(r'^investigate/?$', views_doc.investigate),


url(r'^stats/newrevisiondocevent/?$', views_stats.chart_newrevisiondocevent),
url(r'^stats/newrevisiondocevent/conf/?$', views_stats.chart_conf_newrevisiondocevent),
Expand Down Expand Up @@ -179,7 +181,8 @@
url(r'^%(name)s/session/' % settings.URL_REGEXPS, include('ietf.doc.urls_material')),
url(r'^(?P<name>[A-Za-z0-9._+-]+)/session/', include(session_patterns)),
url(r'^(?P<name>[A-Za-z0-9\._\+\-]+)$', views_search.search_for_name),
# latest versions - keep old URLs alive during migration period
# rfcdiff - latest versions - keep old URLs alive during migration period
url(r'^rfcdiff-latest-json/%(name)s(?:-%(rev)s)?(\.txt|\.html)?/?$' % settings.URL_REGEXPS, RedirectView.as_view(pattern_name='ietf.api.views.rfcdiff_latest_json', permanent=True)),
url(r'^rfcdiff-latest-json/(?P<name>[Rr][Ff][Cc] [0-9]+?)(\.txt|\.html)?/?$', RedirectView.as_view(pattern_name='ietf.api.views.rfcdiff_latest_json', permanent=True)),
# end of rfcdiff support URLs
]
27 changes: 27 additions & 0 deletions ietf/doc/utils.py
Expand Up @@ -13,6 +13,7 @@

from collections import defaultdict, namedtuple, Counter
from dataclasses import dataclass
from pathlib import Path
from typing import Iterator, Union
from zoneinfo import ZoneInfo

Expand Down Expand Up @@ -1382,3 +1383,29 @@ def __iter__(self) -> Iterator[tuple[str, list[str]]]:
# .all = everything from above
if all:
yield alias + ".all", list(all)

def investigate_fragment(name_fragment):
can_verify = set()
for root in [settings.INTERNET_DRAFT_PATH, settings.INTERNET_DRAFT_ARCHIVE_DIR]:
can_verify.update(list(Path(root).glob(f"*{name_fragment}*")))

can_verify.update(list(Path(settings.AGENDA_PATH).glob(f"**/*{name_fragment}*")))

# N.B. This reflects the assumption that the internet draft archive dir is in the
# a directory with other collections (at /a/ietfdata/draft/collections as this is written)
unverifiable_collections = set(
Path(settings.INTERNET_DRAFT_ARCHIVE_DIR).parent.glob(f"**/*{name_fragment}*")
)
unverifiable_collections.difference_update(can_verify)

expected_names = set([p.name for p in can_verify.union(unverifiable_collections)])
maybe_unexpected = list(
Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR).glob(f"*{name_fragment}*")
)
unexpected = [p for p in maybe_unexpected if p.name not in expected_names]

return dict(
can_verify=can_verify,
unverifiable_collections=unverifiable_collections,
unexpected=unexpected,
)
17 changes: 15 additions & 2 deletions ietf/doc/views_doc.py
Expand Up @@ -58,7 +58,7 @@
IESG_BALLOT_ACTIVE_STATES, STATUSCHANGE_RELATIONS, DocumentActionHolder, DocumentAuthor,
RelatedDocument, RelatedDocHistory)
from ietf.doc.utils import (augment_events_with_revision,
can_adopt_draft, can_unadopt_draft, get_chartering_type, get_tags_for_stream_id,
can_adopt_draft, can_unadopt_draft, get_chartering_type, get_tags_for_stream_id, investigate_fragment,
needed_ballot_positions, nice_consensus, update_telechat, has_same_ballot,
get_initial_notify, make_notify_changed_event, make_rev_history, default_consensus,
add_events_message_info, get_unicode_document_content,
Expand All @@ -72,7 +72,7 @@
role_required, is_individual_draft_author, can_request_rfc_publication)
from ietf.name.models import StreamName, BallotPositionName
from ietf.utils.history import find_history_active_at
from ietf.doc.forms import TelechatForm, NotifyForm, ActionHoldersForm, DocAuthorForm, DocAuthorChangeBasisForm
from ietf.doc.forms import InvestigateForm, TelechatForm, NotifyForm, ActionHoldersForm, DocAuthorForm, DocAuthorChangeBasisForm
from ietf.doc.mails import email_comment, email_remind_action_holders
from ietf.mailtrigger.utils import gather_relevant_expansions
from ietf.meeting.models import Session, SessionPresentation
Expand Down Expand Up @@ -2254,3 +2254,16 @@ def idnits2_state(request, name, rev=None):
content_type="text/plain;charset=utf-8",
)

@role_required("Secretariat")
def investigate(request):
results = None
if request.method == "POST":
form = InvestigateForm(request.POST)
if form.is_valid():
name_fragment = form.cleaned_data["name_fragment"]
results = investigate_fragment(name_fragment)
else:
form = InvestigateForm()
return render(
request, "doc/investigate.html", context=dict(form=form, results=results)
)
93 changes: 93 additions & 0 deletions ietf/templates/doc/investigate.html
@@ -0,0 +1,93 @@
{% extends "base.html" %}
{# Copyright The IETF Trust 2024, All Rights Reserved #}
{% load django_bootstrap5 ietf_filters origin static %}
{% block title %}Investigate{% endblock %}
{% block pagehead %}
<link rel="stylesheet" href="{% static "ietf/css/list.css" %}">
{% endblock %}
{% block content %}
{% origin %}
<h1>Investigate</h1>
<form id="investigate" method="post">
{% csrf_token %}
{% bootstrap_form form %}
<button class="btn btn-primary" type="submit">Investigate</button>
</form>
{% if results %}
<div id="results">
{% if results.can_verify %}
<h2>These can be authenticated</h2>
<table id="authenticated" class="table table-sm table-striped tablesorter">
<thead>
<tr>
<th scope="col" data-sort="name">Name</th>
<th scope="col" data-sort="modified">Last Modified On</th>
<th scope="col" data-sort="link">Link</th>
</tr>
</thead>
<tbody>
{% for path in results.can_verify %}
{% with url=path|url_for_path %}
<tr><td>{{path.name}}</td><td>{{path|mtime|date:"DATETIME_FORMAT"}}</td><td><a href="{{url}}">{{url}}</a></td></tr>
{% endwith %}
{% endfor %}
</tbody>
</table>
{% else %}
<h2>Nothing with this name fragment can be authenticated</h2>
{% endif %}
<hr>
{% if results.unverifiable_collections %}
<h2>These are in the archive, but cannot be authenticated</h2>
<table id="unverifiable" class="table table-sm table-striped tablesorter">
<thead>
<tr>
<th scope="col" data-sort="name">Name</th>
<th scope="col" data-sort="modified">Last Modified On</th>
<th scope="col" data-sort="link">Link</th>
<th scope="col" data-sort="source">Source</th>
</tr>
</thead>
<tbody>
{% for path in results.unverifiable_collections %}
{% with url=path|url_for_path %}
<tr>
<td>{{path.name}}</td>
<td>{{path|mtime|date:"DATETIME_FORMAT"}}</td>
<td><a href="{{url}}">{{url}}</a></td>
<td>{{path}}</td>
</tr>
{% endwith %}
{% endfor %}
</tbody>
</table>
{% endif %}
{% if results.unexpected %}
<h2>These are unexpected and we do not know what their origin is. These cannot be authenticated</h2>
<table id="unexpected" class="table table-sm table-striped tablesorter">
<thead>
<tr>
<th scope="col" data-sort="name">Name</th>
<th scope="col" data-sort="modified">Last Modified On</th>
<th scope="col" data-sort="link">Link</th>
</tr>
</thead>
<tbody>
{% for path in results.unexpected %}
{% with url=path|url_for_path %}
<tr>
<td>{{path.name}}</td>
<td>{{path|mtime|date:"DATETIME_FORMAT"}}</td>
<td><a href="{{url}}">{{url}}</a></td>
</tr>
{% endwith %}
{% endfor %}
</tbody>
</table>
{% endif %}
</div>
{% endif %}
{% endblock %}
{% block js %}
<script src="{% static "ietf/js/list.js" %}"></script>
{% endblock %}

0 comments on commit 6b11291

Please sign in to comment.