Skip to content

Commit 6b11291

Browse files
authored
feat: investigate file authenticity (#7331)
* feat: investigate file authenticity * fix: use django-provided validation
1 parent 0616b07 commit 6b11291

File tree

7 files changed

+325
-4
lines changed

7 files changed

+325
-4
lines changed

ietf/doc/forms.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,3 +266,24 @@ def clean(self):
266266
@staticmethod
267267
def valid_resource_tags():
268268
return ExtResourceName.objects.all().order_by('slug').values_list('slug', flat=True)
269+
270+
class InvestigateForm(forms.Form):
271+
name_fragment = forms.CharField(
272+
label="File name or fragment to investigate",
273+
required=True,
274+
help_text=(
275+
"Enter a filename such as draft-ietf-some-draft-00.txt or a fragment like draft-ietf-some-draft using at least 8 characters. The search will also work for files that are not necessarily drafts."
276+
),
277+
min_length=8,
278+
)
279+
280+
def clean_name_fragment(self):
281+
disallowed_characters = ["%", "/", "\\", "*"]
282+
name_fragment = self.cleaned_data["name_fragment"]
283+
# Manual inspection of the directories at the time of this writing shows
284+
# looking for files with less than 8 characters in the name is not useful
285+
# Requiring this will help protect against the secretariat unintentionally
286+
# matching every draft.
287+
if any(c in name_fragment for c in disallowed_characters):
288+
raise ValidationError(f"The following characters are disallowed: {', '.join(disallowed_characters)}")
289+
return name_fragment

ietf/doc/templatetags/ietf_filters.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import datetime
66
import re
7+
from pathlib import Path
78
from urllib.parse import urljoin
89
from zoneinfo import ZoneInfo
910

@@ -899,3 +900,32 @@ def simple_history_delta_change_cnt(history):
899900
delta = history.diff_against(prev)
900901
return len(delta.changes)
901902
return 0
903+
904+
@register.filter
905+
def mtime(path):
906+
"""Returns a datetime object representing mtime given a pathlib Path object"""
907+
return datetime.datetime.fromtimestamp(path.stat().st_mtime).astimezone(ZoneInfo(settings.TIME_ZONE))
908+
909+
@register.filter
910+
def url_for_path(path):
911+
"""Consructs a 'best' URL for web access to the given pathlib Path object.
912+
913+
Assumes that the path is into the Internet-Draft archive or the proceedings.
914+
"""
915+
if path.match(f"{settings.AGENDA_PATH}/**/*"):
916+
return (
917+
f"https://www.ietf.org/proceedings/{path.relative_to(settings.AGENDA_PATH)}"
918+
)
919+
elif any(
920+
[
921+
pathdir in path.parents
922+
for pathdir in [
923+
Path(settings.INTERNET_DRAFT_PATH),
924+
Path(settings.INTERNET_DRAFT_ARCHIVE_DIR).parent,
925+
Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR),
926+
]
927+
]
928+
):
929+
return f"{settings.IETF_ID_ARCHIVE_URL}{path.name}"
930+
else:
931+
return "#"

ietf/doc/tests.py

Lines changed: 135 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
StatusChangeFactory, DocExtResourceFactory, RgDraftFactory, BcpFactory)
4646
from ietf.doc.forms import NotifyForm
4747
from ietf.doc.fields import SearchableDocumentsField
48-
from ietf.doc.utils import create_ballot_if_not_open, uppercase_std_abbreviated_name, DraftAliasGenerator
48+
from ietf.doc.utils import create_ballot_if_not_open, investigate_fragment, uppercase_std_abbreviated_name, DraftAliasGenerator
4949
from ietf.group.models import Group, Role
5050
from ietf.group.factories import GroupFactory, RoleFactory
5151
from ietf.ipr.factories import HolderIprDisclosureFactory
@@ -3141,3 +3141,137 @@ def test_state_index(self):
31413141
if not '-' in name:
31423142
self.assertIn(name, content)
31433143

3144+
class InvestigateTests(TestCase):
3145+
settings_temp_path_overrides = TestCase.settings_temp_path_overrides + [
3146+
"AGENDA_PATH",
3147+
# "INTERNET_DRAFT_PATH",
3148+
# "INTERNET_DRAFT_ARCHIVE_DIR",
3149+
# "INTERNET_ALL_DRAFTS_ARCHIVE_DIR",
3150+
]
3151+
3152+
def setUp(self):
3153+
super().setUp()
3154+
# Contort the draft archive dir temporary replacement
3155+
# to match the "collections" concept
3156+
archive_tmp_dir = Path(settings.INTERNET_DRAFT_ARCHIVE_DIR)
3157+
new_archive_dir = archive_tmp_dir / "draft-archive"
3158+
new_archive_dir.mkdir()
3159+
settings.INTERNET_DRAFT_ARCHIVE_DIR = str(new_archive_dir)
3160+
donated_personal_copy_dir = archive_tmp_dir / "donated-personal-copy"
3161+
donated_personal_copy_dir.mkdir()
3162+
meeting_dir = Path(settings.AGENDA_PATH) / "666"
3163+
meeting_dir.mkdir()
3164+
all_archive_dir = Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR)
3165+
repository_dir = Path(settings.INTERNET_DRAFT_PATH)
3166+
3167+
for path in [repository_dir, all_archive_dir]:
3168+
(path / "draft-this-is-active-00.txt").touch()
3169+
for path in [new_archive_dir, all_archive_dir]:
3170+
(path / "draft-old-but-can-authenticate-00.txt").touch()
3171+
(path / "draft-has-mixed-provenance-01.txt").touch()
3172+
for path in [donated_personal_copy_dir, all_archive_dir]:
3173+
(path / "draft-donated-from-a-personal-collection-00.txt").touch()
3174+
(path / "draft-has-mixed-provenance-00.txt").touch()
3175+
(path / "draft-has-mixed-provenance-00.txt.Z").touch()
3176+
(all_archive_dir / "draft-this-should-not-be-possible-00.txt").touch()
3177+
(meeting_dir / "draft-this-predates-the-archive-00.txt").touch()
3178+
3179+
def test_investigate_fragment(self):
3180+
3181+
result = investigate_fragment("this-is-active")
3182+
self.assertEqual(len(result["can_verify"]), 1)
3183+
self.assertEqual(len(result["unverifiable_collections"]), 0)
3184+
self.assertEqual(len(result["unexpected"]), 0)
3185+
self.assertEqual(
3186+
list(result["can_verify"])[0].name, "draft-this-is-active-00.txt"
3187+
)
3188+
3189+
result = investigate_fragment("old-but-can")
3190+
self.assertEqual(len(result["can_verify"]), 1)
3191+
self.assertEqual(len(result["unverifiable_collections"]), 0)
3192+
self.assertEqual(len(result["unexpected"]), 0)
3193+
self.assertEqual(
3194+
list(result["can_verify"])[0].name, "draft-old-but-can-authenticate-00.txt"
3195+
)
3196+
3197+
result = investigate_fragment("predates")
3198+
self.assertEqual(len(result["can_verify"]), 1)
3199+
self.assertEqual(len(result["unverifiable_collections"]), 0)
3200+
self.assertEqual(len(result["unexpected"]), 0)
3201+
self.assertEqual(
3202+
list(result["can_verify"])[0].name, "draft-this-predates-the-archive-00.txt"
3203+
)
3204+
3205+
result = investigate_fragment("personal-collection")
3206+
self.assertEqual(len(result["can_verify"]), 0)
3207+
self.assertEqual(len(result["unverifiable_collections"]), 1)
3208+
self.assertEqual(len(result["unexpected"]), 0)
3209+
self.assertEqual(
3210+
list(result["unverifiable_collections"])[0].name,
3211+
"draft-donated-from-a-personal-collection-00.txt",
3212+
)
3213+
3214+
result = investigate_fragment("mixed-provenance")
3215+
self.assertEqual(len(result["can_verify"]), 1)
3216+
self.assertEqual(len(result["unverifiable_collections"]), 2)
3217+
self.assertEqual(len(result["unexpected"]), 0)
3218+
self.assertEqual(
3219+
list(result["can_verify"])[0].name, "draft-has-mixed-provenance-01.txt"
3220+
)
3221+
self.assertEqual(
3222+
set([p.name for p in result["unverifiable_collections"]]),
3223+
set(
3224+
[
3225+
"draft-has-mixed-provenance-00.txt",
3226+
"draft-has-mixed-provenance-00.txt.Z",
3227+
]
3228+
),
3229+
)
3230+
3231+
result = investigate_fragment("not-be-possible")
3232+
self.assertEqual(len(result["can_verify"]), 0)
3233+
self.assertEqual(len(result["unverifiable_collections"]), 0)
3234+
self.assertEqual(len(result["unexpected"]), 1)
3235+
self.assertEqual(
3236+
list(result["unexpected"])[0].name,
3237+
"draft-this-should-not-be-possible-00.txt",
3238+
)
3239+
3240+
def test_investigate(self):
3241+
url = urlreverse("ietf.doc.views_doc.investigate")
3242+
login_testing_unauthorized(self, "secretary", url)
3243+
r = self.client.get(url)
3244+
self.assertEqual(r.status_code, 200)
3245+
q = PyQuery(r.content)
3246+
self.assertEqual(len(q("form#investigate")), 1)
3247+
self.assertEqual(len(q("div#results")), 0)
3248+
r = self.client.post(url, dict(name_fragment="this-is-not-found"))
3249+
self.assertEqual(r.status_code, 200)
3250+
q = PyQuery(r.content)
3251+
self.assertEqual(len(q("div#results")), 1)
3252+
self.assertEqual(len(q("table#authenticated")), 0)
3253+
self.assertEqual(len(q("table#unverifiable")), 0)
3254+
self.assertEqual(len(q("table#unexpected")), 0)
3255+
r = self.client.post(url, dict(name_fragment="mixed-provenance"))
3256+
self.assertEqual(r.status_code, 200)
3257+
q = PyQuery(r.content)
3258+
self.assertEqual(len(q("div#results")), 1)
3259+
self.assertEqual(len(q("table#authenticated")), 1)
3260+
self.assertEqual(len(q("table#unverifiable")), 1)
3261+
self.assertEqual(len(q("table#unexpected")), 0)
3262+
r = self.client.post(url, dict(name_fragment="not-be-possible"))
3263+
self.assertEqual(r.status_code, 200)
3264+
q = PyQuery(r.content)
3265+
self.assertEqual(len(q("div#results")), 1)
3266+
self.assertEqual(len(q("table#authenticated")), 0)
3267+
self.assertEqual(len(q("table#unverifiable")), 0)
3268+
self.assertEqual(len(q("table#unexpected")), 1)
3269+
r = self.client.post(url, dict(name_fragment="short"))
3270+
self.assertEqual(r.status_code, 200)
3271+
q = PyQuery(r.content)
3272+
self.assertEqual(len(q("#id_name_fragment.is-invalid")), 1)
3273+
for char in ["*", "%", "/", "\\"]:
3274+
r = self.client.post(url, dict(name_fragment=f"bad{char}character"))
3275+
self.assertEqual(r.status_code, 200)
3276+
q = PyQuery(r.content)
3277+
self.assertEqual(len(q("#id_name_fragment.is-invalid")), 1)

ietf/doc/urls.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@
6666
r"^shepherdwriteup-template/(?P<type>\w+)/?$",
6767
views_doc.document_shepherd_writeup_template,
6868
),
69+
url(r'^investigate/?$', views_doc.investigate),
70+
6971

7072
url(r'^stats/newrevisiondocevent/?$', views_stats.chart_newrevisiondocevent),
7173
url(r'^stats/newrevisiondocevent/conf/?$', views_stats.chart_conf_newrevisiondocevent),
@@ -179,7 +181,8 @@
179181
url(r'^%(name)s/session/' % settings.URL_REGEXPS, include('ietf.doc.urls_material')),
180182
url(r'^(?P<name>[A-Za-z0-9._+-]+)/session/', include(session_patterns)),
181183
url(r'^(?P<name>[A-Za-z0-9\._\+\-]+)$', views_search.search_for_name),
182-
# latest versions - keep old URLs alive during migration period
184+
# rfcdiff - latest versions - keep old URLs alive during migration period
183185
url(r'^rfcdiff-latest-json/%(name)s(?:-%(rev)s)?(\.txt|\.html)?/?$' % settings.URL_REGEXPS, RedirectView.as_view(pattern_name='ietf.api.views.rfcdiff_latest_json', permanent=True)),
184186
url(r'^rfcdiff-latest-json/(?P<name>[Rr][Ff][Cc] [0-9]+?)(\.txt|\.html)?/?$', RedirectView.as_view(pattern_name='ietf.api.views.rfcdiff_latest_json', permanent=True)),
187+
# end of rfcdiff support URLs
185188
]

ietf/doc/utils.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
from collections import defaultdict, namedtuple, Counter
1515
from dataclasses import dataclass
16+
from pathlib import Path
1617
from typing import Iterator, Union
1718
from zoneinfo import ZoneInfo
1819

@@ -1382,3 +1383,29 @@ def __iter__(self) -> Iterator[tuple[str, list[str]]]:
13821383
# .all = everything from above
13831384
if all:
13841385
yield alias + ".all", list(all)
1386+
1387+
def investigate_fragment(name_fragment):
1388+
can_verify = set()
1389+
for root in [settings.INTERNET_DRAFT_PATH, settings.INTERNET_DRAFT_ARCHIVE_DIR]:
1390+
can_verify.update(list(Path(root).glob(f"*{name_fragment}*")))
1391+
1392+
can_verify.update(list(Path(settings.AGENDA_PATH).glob(f"**/*{name_fragment}*")))
1393+
1394+
# N.B. This reflects the assumption that the internet draft archive dir is in the
1395+
# a directory with other collections (at /a/ietfdata/draft/collections as this is written)
1396+
unverifiable_collections = set(
1397+
Path(settings.INTERNET_DRAFT_ARCHIVE_DIR).parent.glob(f"**/*{name_fragment}*")
1398+
)
1399+
unverifiable_collections.difference_update(can_verify)
1400+
1401+
expected_names = set([p.name for p in can_verify.union(unverifiable_collections)])
1402+
maybe_unexpected = list(
1403+
Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR).glob(f"*{name_fragment}*")
1404+
)
1405+
unexpected = [p for p in maybe_unexpected if p.name not in expected_names]
1406+
1407+
return dict(
1408+
can_verify=can_verify,
1409+
unverifiable_collections=unverifiable_collections,
1410+
unexpected=unexpected,
1411+
)

ietf/doc/views_doc.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
IESG_BALLOT_ACTIVE_STATES, STATUSCHANGE_RELATIONS, DocumentActionHolder, DocumentAuthor,
5959
RelatedDocument, RelatedDocHistory)
6060
from ietf.doc.utils import (augment_events_with_revision,
61-
can_adopt_draft, can_unadopt_draft, get_chartering_type, get_tags_for_stream_id,
61+
can_adopt_draft, can_unadopt_draft, get_chartering_type, get_tags_for_stream_id, investigate_fragment,
6262
needed_ballot_positions, nice_consensus, update_telechat, has_same_ballot,
6363
get_initial_notify, make_notify_changed_event, make_rev_history, default_consensus,
6464
add_events_message_info, get_unicode_document_content,
@@ -72,7 +72,7 @@
7272
role_required, is_individual_draft_author, can_request_rfc_publication)
7373
from ietf.name.models import StreamName, BallotPositionName
7474
from ietf.utils.history import find_history_active_at
75-
from ietf.doc.forms import TelechatForm, NotifyForm, ActionHoldersForm, DocAuthorForm, DocAuthorChangeBasisForm
75+
from ietf.doc.forms import InvestigateForm, TelechatForm, NotifyForm, ActionHoldersForm, DocAuthorForm, DocAuthorChangeBasisForm
7676
from ietf.doc.mails import email_comment, email_remind_action_holders
7777
from ietf.mailtrigger.utils import gather_relevant_expansions
7878
from ietf.meeting.models import Session, SessionPresentation
@@ -2254,3 +2254,16 @@ def idnits2_state(request, name, rev=None):
22542254
content_type="text/plain;charset=utf-8",
22552255
)
22562256

2257+
@role_required("Secretariat")
2258+
def investigate(request):
2259+
results = None
2260+
if request.method == "POST":
2261+
form = InvestigateForm(request.POST)
2262+
if form.is_valid():
2263+
name_fragment = form.cleaned_data["name_fragment"]
2264+
results = investigate_fragment(name_fragment)
2265+
else:
2266+
form = InvestigateForm()
2267+
return render(
2268+
request, "doc/investigate.html", context=dict(form=form, results=results)
2269+
)

ietf/templates/doc/investigate.html

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
{% extends "base.html" %}
2+
{# Copyright The IETF Trust 2024, All Rights Reserved #}
3+
{% load django_bootstrap5 ietf_filters origin static %}
4+
{% block title %}Investigate{% endblock %}
5+
{% block pagehead %}
6+
<link rel="stylesheet" href="{% static "ietf/css/list.css" %}">
7+
{% endblock %}
8+
{% block content %}
9+
{% origin %}
10+
<h1>Investigate</h1>
11+
<form id="investigate" method="post">
12+
{% csrf_token %}
13+
{% bootstrap_form form %}
14+
<button class="btn btn-primary" type="submit">Investigate</button>
15+
</form>
16+
{% if results %}
17+
<div id="results">
18+
{% if results.can_verify %}
19+
<h2>These can be authenticated</h2>
20+
<table id="authenticated" class="table table-sm table-striped tablesorter">
21+
<thead>
22+
<tr>
23+
<th scope="col" data-sort="name">Name</th>
24+
<th scope="col" data-sort="modified">Last Modified On</th>
25+
<th scope="col" data-sort="link">Link</th>
26+
</tr>
27+
</thead>
28+
<tbody>
29+
{% for path in results.can_verify %}
30+
{% with url=path|url_for_path %}
31+
<tr><td>{{path.name}}</td><td>{{path|mtime|date:"DATETIME_FORMAT"}}</td><td><a href="{{url}}">{{url}}</a></td></tr>
32+
{% endwith %}
33+
{% endfor %}
34+
</tbody>
35+
</table>
36+
{% else %}
37+
<h2>Nothing with this name fragment can be authenticated</h2>
38+
{% endif %}
39+
<hr>
40+
{% if results.unverifiable_collections %}
41+
<h2>These are in the archive, but cannot be authenticated</h2>
42+
<table id="unverifiable" class="table table-sm table-striped tablesorter">
43+
<thead>
44+
<tr>
45+
<th scope="col" data-sort="name">Name</th>
46+
<th scope="col" data-sort="modified">Last Modified On</th>
47+
<th scope="col" data-sort="link">Link</th>
48+
<th scope="col" data-sort="source">Source</th>
49+
</tr>
50+
</thead>
51+
<tbody>
52+
{% for path in results.unverifiable_collections %}
53+
{% with url=path|url_for_path %}
54+
<tr>
55+
<td>{{path.name}}</td>
56+
<td>{{path|mtime|date:"DATETIME_FORMAT"}}</td>
57+
<td><a href="{{url}}">{{url}}</a></td>
58+
<td>{{path}}</td>
59+
</tr>
60+
{% endwith %}
61+
{% endfor %}
62+
</tbody>
63+
</table>
64+
{% endif %}
65+
{% if results.unexpected %}
66+
<h2>These are unexpected and we do not know what their origin is. These cannot be authenticated</h2>
67+
<table id="unexpected" class="table table-sm table-striped tablesorter">
68+
<thead>
69+
<tr>
70+
<th scope="col" data-sort="name">Name</th>
71+
<th scope="col" data-sort="modified">Last Modified On</th>
72+
<th scope="col" data-sort="link">Link</th>
73+
</tr>
74+
</thead>
75+
<tbody>
76+
{% for path in results.unexpected %}
77+
{% with url=path|url_for_path %}
78+
<tr>
79+
<td>{{path.name}}</td>
80+
<td>{{path|mtime|date:"DATETIME_FORMAT"}}</td>
81+
<td><a href="{{url}}">{{url}}</a></td>
82+
</tr>
83+
{% endwith %}
84+
{% endfor %}
85+
</tbody>
86+
</table>
87+
{% endif %}
88+
</div>
89+
{% endif %}
90+
{% endblock %}
91+
{% block js %}
92+
<script src="{% static "ietf/js/list.js" %}"></script>
93+
{% endblock %}

0 commit comments

Comments
 (0)