regex_search_string: whole match objects and multiple results
This commit is contained in:
parent
5ad75cb94c
commit
465f8ed5c0
|
@ -89,13 +89,25 @@ def get_email_address():
|
||||||
])
|
])
|
||||||
|
|
||||||
|
|
||||||
def regex_search_string(regexes, string, default=None):
|
def regex_search_string(regexes, string, default=None, multiple=False, whole_match=False):
|
||||||
if not isinstance(regexes, list):
|
if not isinstance(regexes, list):
|
||||||
regexes = [regexes]
|
regexes = [regexes]
|
||||||
|
results = []
|
||||||
for regex in regexes:
|
for regex in regexes:
|
||||||
match = re.search(regex, string)
|
if multiple:
|
||||||
if match:
|
matches = re.finditer(regex, string)
|
||||||
return match.group(1)
|
for match in matches:
|
||||||
|
if not whole_match:
|
||||||
|
match = match.group(1)
|
||||||
|
results.append(match)
|
||||||
|
else:
|
||||||
|
match = re.search(regex, string)
|
||||||
|
if match:
|
||||||
|
if not whole_match:
|
||||||
|
match = match.group(1)
|
||||||
|
return match
|
||||||
|
if multiple:
|
||||||
|
return results
|
||||||
return default
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -3,31 +3,34 @@ from librefi.utils import regex_search_string
|
||||||
HTML_STRING = """
|
HTML_STRING = """
|
||||||
<form method="POST" action="/?your=mother">
|
<form method="POST" action="/?your=mother">
|
||||||
<input type="hidden" value="tesco" />
|
<input type="hidden" value="tesco" />
|
||||||
|
<input type="hidden" name="twoja" value="stara" />
|
||||||
</form>
|
</form>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
PATTERN = r'<form [^>]+action="([^"]+)'
|
PATTERN_1 = r'<form [^>]+action="([^"]+)'
|
||||||
|
EXPECTED_RESULT_1 = "/?your=mother"
|
||||||
|
|
||||||
EXPECTED_RESULT = "/?your=mother"
|
PATTERN_2 = r'<input [^>]*value="([^"]+)"'
|
||||||
|
EXPECTED_RESULT_2 = ["tesco", "stara"]
|
||||||
|
|
||||||
|
|
||||||
def test_regex_search_single():
|
def test_regex_search_single_regex():
|
||||||
result = regex_search_string(PATTERN, HTML_STRING)
|
result = regex_search_string(PATTERN_1, HTML_STRING)
|
||||||
assert result is not None
|
assert result is not None
|
||||||
assert result == EXPECTED_RESULT
|
assert result == EXPECTED_RESULT_1
|
||||||
|
|
||||||
|
|
||||||
def test_regex_search_multiple():
|
def test_regex_search_multiple_regexes():
|
||||||
result = regex_search_string(
|
result = regex_search_string(
|
||||||
[r'dupsko', PATTERN, r'byle jakie'], HTML_STRING)
|
[r'dupsko', PATTERN_1, r'byle jakie'], HTML_STRING)
|
||||||
assert result is not None
|
assert result is not None
|
||||||
assert result == EXPECTED_RESULT
|
assert result == EXPECTED_RESULT_1
|
||||||
|
|
||||||
|
|
||||||
def test_regex_search_invalid():
|
def test_regex_search_invalid():
|
||||||
result = regex_search_string([r'dupsko', r'czorne'], HTML_STRING)
|
result = regex_search_string([r'dupsko', r'czorne'], HTML_STRING)
|
||||||
assert result is None
|
assert result is None
|
||||||
assert result != EXPECTED_RESULT
|
assert result != EXPECTED_RESULT_1
|
||||||
|
|
||||||
|
|
||||||
def test_regex_search_default():
|
def test_regex_search_default():
|
||||||
|
@ -36,3 +39,29 @@ def test_regex_search_default():
|
||||||
[r'dupsko', r'czorne'], HTML_STRING, default=default)
|
[r'dupsko', r'czorne'], HTML_STRING, default=default)
|
||||||
assert result is not None
|
assert result is not None
|
||||||
assert result == default
|
assert result == default
|
||||||
|
|
||||||
|
|
||||||
|
def test_regex_search_multiple_results():
|
||||||
|
results = regex_search_string(PATTERN_2, HTML_STRING, multiple=True)
|
||||||
|
assert results is not None
|
||||||
|
assert isinstance(results, list)
|
||||||
|
assert len(results) == len(EXPECTED_RESULT_2)
|
||||||
|
for i in range(len(results)):
|
||||||
|
assert isinstance(results[i], str)
|
||||||
|
assert results[i] == EXPECTED_RESULT_2[i]
|
||||||
|
|
||||||
|
|
||||||
|
def test_regex_search_whole_single_match():
|
||||||
|
result = regex_search_string(PATTERN_1, HTML_STRING, whole_match=True)
|
||||||
|
assert result is not None
|
||||||
|
assert result.group(1) == EXPECTED_RESULT_1
|
||||||
|
|
||||||
|
|
||||||
|
def test_regex_search_multiple_whole_matches():
|
||||||
|
results = regex_search_string(
|
||||||
|
PATTERN_2, HTML_STRING, multiple=True, whole_match=True)
|
||||||
|
assert results is not None
|
||||||
|
assert isinstance(results, list)
|
||||||
|
assert len(results) == len(EXPECTED_RESULT_2)
|
||||||
|
for i in range(len(results)):
|
||||||
|
assert results[i].group(1) == EXPECTED_RESULT_2[i]
|
||||||
|
|
Loading…
Reference in a new issue