regex_search_string: whole match objects and multiple results

netiawifi
selfisekai 2020-09-20 18:19:48 +02:00
parent 5ad75cb94c
commit 465f8ed5c0
2 changed files with 54 additions and 13 deletions

View File

@ -89,13 +89,25 @@ def get_email_address():
])
def regex_search_string(regexes, string, default=None):
def regex_search_string(regexes, string, default=None, multiple=False, whole_match=False):
if not isinstance(regexes, list):
regexes = [regexes]
results = []
for regex in regexes:
match = re.search(regex, string)
if match:
return match.group(1)
if multiple:
matches = re.finditer(regex, string)
for match in matches:
if not whole_match:
match = match.group(1)
results.append(match)
else:
match = re.search(regex, string)
if match:
if not whole_match:
match = match.group(1)
return match
if multiple:
return results
return default

View File

@ -3,31 +3,34 @@ from librefi.utils import regex_search_string
HTML_STRING = """
<form method="POST" action="/?your=mother">
<input type="hidden" value="tesco" />
<input type="hidden" name="twoja" value="stara" />
</form>
"""
PATTERN = r'<form [^>]+action="([^"]+)'
PATTERN_1 = r'<form [^>]+action="([^"]+)'
EXPECTED_RESULT_1 = "/?your=mother"
EXPECTED_RESULT = "/?your=mother"
PATTERN_2 = r'<input [^>]*value="([^"]+)"'
EXPECTED_RESULT_2 = ["tesco", "stara"]
def test_regex_search_single():
result = regex_search_string(PATTERN, HTML_STRING)
def test_regex_search_single_regex():
result = regex_search_string(PATTERN_1, HTML_STRING)
assert result is not None
assert result == EXPECTED_RESULT
assert result == EXPECTED_RESULT_1
def test_regex_search_multiple():
def test_regex_search_multiple_regexes():
result = regex_search_string(
[r'dupsko', PATTERN, r'byle jakie'], HTML_STRING)
[r'dupsko', PATTERN_1, r'byle jakie'], HTML_STRING)
assert result is not None
assert result == EXPECTED_RESULT
assert result == EXPECTED_RESULT_1
def test_regex_search_invalid():
result = regex_search_string([r'dupsko', r'czorne'], HTML_STRING)
assert result is None
assert result != EXPECTED_RESULT
assert result != EXPECTED_RESULT_1
def test_regex_search_default():
@ -36,3 +39,29 @@ def test_regex_search_default():
[r'dupsko', r'czorne'], HTML_STRING, default=default)
assert result is not None
assert result == default
def test_regex_search_multiple_results():
results = regex_search_string(PATTERN_2, HTML_STRING, multiple=True)
assert results is not None
assert isinstance(results, list)
assert len(results) == len(EXPECTED_RESULT_2)
for i in range(len(results)):
assert isinstance(results[i], str)
assert results[i] == EXPECTED_RESULT_2[i]
def test_regex_search_whole_single_match():
result = regex_search_string(PATTERN_1, HTML_STRING, whole_match=True)
assert result is not None
assert result.group(1) == EXPECTED_RESULT_1
def test_regex_search_multiple_whole_matches():
results = regex_search_string(
PATTERN_2, HTML_STRING, multiple=True, whole_match=True)
assert results is not None
assert isinstance(results, list)
assert len(results) == len(EXPECTED_RESULT_2)
for i in range(len(results)):
assert results[i].group(1) == EXPECTED_RESULT_2[i]