forked from freelawproject/courts-db
-
Notifications
You must be signed in to change notification settings - Fork 0
/
tests.py
262 lines (222 loc) · 8.5 KB
/
tests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
import json
import os
import re
import sys
import unittest
from collections import Counter
from io import open
from json.decoder import JSONDecodeError
from pathlib import Path
from unittest import TestCase
from courts_db import find_court, find_court_by_id
from courts_db.text_utils import strip_punc
from courts_db.utils import db_root, load_courts_db
class CourtsDBTestCase(TestCase):
def setUp(self):
self.courts = load_courts_db()
class DataTest(CourtsDBTestCase):
"""Data tests are used to confirm our data set is functional."""
def test_unicode_handling(self):
"""Do we handle regex matching with accents or other non-ascii?"""
sample_text = "Tribunal Dé Apelaciones De Puerto Rico"
matches = find_court(court_str=sample_text)
expected_matches = ["prapp"]
self.assertEqual(matches, expected_matches)
def test_parent_courts(self):
"""Can we find the parent court"""
court_str_example = (
"California Court of Appeal, First Appellate District"
)
matches = find_court(court_str=court_str_example)
self.assertEqual(
find_court_by_id(matches[0])[0].get("parent", None), "calctapp"
)
court_str_example = "Supreme Court of the United States"
matches = find_court(court_str=court_str_example)
self.assertEqual(
find_court_by_id(matches[0])[0].get("parent", None), None
)
def test_all_example(self):
"""Can we extract the correct court id from string and date?"""
for court in self.courts:
for court_str_example in court["examples"]:
print(f"Testing {court_str_example}", end=" ... ")
matches = find_court(court_str=court_str_example)
self.assertIn(
court["id"],
matches,
f"Failure to find {court['id']} in {court_str_example}",
)
print("√")
def test_location_filter(self):
"""Can we use location to filter properly"""
court_ids = find_court("Calhoun County Circuit Court")
self.assertEqual(
sorted(court_ids),
["flacirct14cal", "micirct37cal"],
msg="Court filtering failed",
)
florida_court_ids = find_court(
"Calhoun County Circuit Court", location="Florida"
)
self.assertEqual(
["flacirct14cal"],
florida_court_ids,
msg="Florida county court not found",
)
michigan_court_ids = find_court(
"Calhoun County Circuit Court", location="Michigan"
)
self.assertEqual(
["micirct37cal"],
michigan_court_ids,
msg="Michican county court not found",
)
fayette_county = find_court("Fayette County Court of Common Pleas")
self.assertEqual(
sorted(["pactcomplfayett", "ohctcomplfayett"]),
sorted(fayette_county),
msg="Courts not found",
)
fayette_county = find_court(
"Fayette County Court of Common Pleas", location="jibberish"
)
self.assertEqual(
[],
fayette_county,
msg="Courts not found",
)
fayette_county = find_court(
"Fayette County Court of Common Pleas", location="Ohio"
)
self.assertEqual(
["ohctcomplfayett"],
fayette_county,
msg="Courts not found",
)
fayette_county = find_court(
"Fayette County Court of Common Pleas", location="Pennsylvania"
)
self.assertEqual(
["pactcomplfayett"],
fayette_county,
msg="Courts not found",
)
class ExamplesTest(CourtsDBTestCase):
def test_all_non_bankruptcy_examples(self):
for court in self.courts:
if court["type"] == "bankruptcy":
continue
for example in court["examples"]:
example = strip_punc(example)
matches = find_court(court_str=example, bankruptcy=False)
results = list(set(matches))
self.assertIn(court["id"], results, msg=f"Failed {example}")
def test_bankruptcy_examples(self):
for court in self.courts:
if court["type"] != "bankruptcy":
continue
for example in court["examples"]:
example = strip_punc(example)
matches = find_court(court_str=example, bankruptcy=True)
results = list(set(matches))
self.assertIn(court["id"], results, msg=f"Failed {example}")
class JsonTest(CourtsDBTestCase):
def setUp(self) -> None:
self.name_regex = r'"name": "(?P<name>.*)",'
self.court_regex = r"(^\s{4}?{)((.*\n){1,100}?)(\s{4}?},)"
self.id_regex = r'"id": ("(?P<id>.*)"|null)'
def test_json(self):
"""Does our json load properly, and if not where are the issues"""
try:
# Load entire json to shortcircuit testing
with open(
os.path.join(db_root, "data", "courts.json"),
"r",
encoding="utf-8",
) as f:
data = f.read()
json.loads(data)
return
except JSONDecodeError as e:
print("Errors exist in the data structure")
pass
matches = re.finditer(self.court_regex, data, re.MULTILINE)
for match in enumerate(matches, start=1):
court = match[1].group().strip(",")
try:
# Load individual courts
j = json.loads(court)
continue
except JSONDecodeError:
pass
id = re.search(self.id_regex, court).group("id")
name = re.search(self.name_regex, court).group("name")
print(f"Issues with ({id}) -- {name}")
def test_unique_ids(self):
"""Are all court ids unique?"""
court_ids = [row["id"] for row in load_courts_db()]
c = Counter(court_ids)
self.assertEqual(
len(court_ids), len(list(set(court_ids))), msg=c.most_common(10)
)
def test_json_keys(self):
"""Are courts have a citation string unique?"""
cites = [
row["id"]
for row in load_courts_db()
if row.get("citation_string", None) == None
]
self.assertEqual(len(cites), 0, msg=cites)
def test_id_length(self):
"""Make sure Id length does not exceed 15 characters"""
max_id_length = max([len(row["id"]) for row in load_courts_db()])
ids = []
if max_id_length > 15:
print(
"Ids are longer than 15 characters. This is not allowed. "
"Please update the id to be 15 characters or less."
)
ids = [
row["id"] for row in load_courts_db() if len(row["id"]) > 15
]
self.assertLessEqual(
max_id_length, 15, msg=f"#{len(ids)}: Ids longer than 15: {ids}"
)
class LazyLoadTest(TestCase):
def test_lazy_load(self):
"""Each lazy attribute should only exist after it is first used."""
# reset courts_db module in case it was already loaded by another test
sys.modules.pop("courts_db")
import courts_db
for attr in ("courts", "court_dict", "regexes"):
self.assertNotIn(attr, dir(courts_db))
self.assertIsNotNone(getattr(courts_db, attr, None))
self.assertIn(attr, dir(courts_db))
class JSONBuildTest(TestCase):
json_name = "courts.json"
def setUp(self):
self.courts = load_courts_db()
@classmethod
def setUpClass(cls) -> None:
"""Preload json file and schema for validation."""
cls.json_path = (
Path(__file__).parent / "courts_db" / "data" / cls.json_name
)
cls.json_str = cls.json_path.read_text()
cls.json = json.loads(cls.json_str)
class StructureTest(JSONBuildTest):
def test_json_format(self):
"""Does format of json file match json.dumps(json.loads(), sort_keys=True)?"""
reformatted = json.dumps(
self.json,
indent=4,
ensure_ascii=False,
sort_keys=True,
)
reformatted += "\n"
if self.json_str != reformatted:
self.json_path.write_text(reformatted)
self.fail("JSON file is not formatted correctly, Fixing...")
if __name__ == "__main__":
unittest.main()