regex_search_string: whole match objects and multiple results
parent
5ad75cb94c
commit
465f8ed5c0
|
@ -89,13 +89,25 @@ def get_email_address():
|
|||
])
|
||||
|
||||
|
||||
def regex_search_string(regexes, string, default=None):
|
||||
def regex_search_string(regexes, string, default=None, multiple=False, whole_match=False):
|
||||
if not isinstance(regexes, list):
|
||||
regexes = [regexes]
|
||||
results = []
|
||||
for regex in regexes:
|
||||
match = re.search(regex, string)
|
||||
if match:
|
||||
return match.group(1)
|
||||
if multiple:
|
||||
matches = re.finditer(regex, string)
|
||||
for match in matches:
|
||||
if not whole_match:
|
||||
match = match.group(1)
|
||||
results.append(match)
|
||||
else:
|
||||
match = re.search(regex, string)
|
||||
if match:
|
||||
if not whole_match:
|
||||
match = match.group(1)
|
||||
return match
|
||||
if multiple:
|
||||
return results
|
||||
return default
|
||||
|
||||
|
||||
|
|
|
@ -3,31 +3,34 @@ from librefi.utils import regex_search_string
|
|||
HTML_STRING = """
|
||||
<form method="POST" action="/?your=mother">
|
||||
<input type="hidden" value="tesco" />
|
||||
<input type="hidden" name="twoja" value="stara" />
|
||||
</form>
|
||||
"""
|
||||
|
||||
PATTERN = r'<form [^>]+action="([^"]+)'
|
||||
PATTERN_1 = r'<form [^>]+action="([^"]+)'
|
||||
EXPECTED_RESULT_1 = "/?your=mother"
|
||||
|
||||
EXPECTED_RESULT = "/?your=mother"
|
||||
PATTERN_2 = r'<input [^>]*value="([^"]+)"'
|
||||
EXPECTED_RESULT_2 = ["tesco", "stara"]
|
||||
|
||||
|
||||
def test_regex_search_single():
|
||||
result = regex_search_string(PATTERN, HTML_STRING)
|
||||
def test_regex_search_single_regex():
|
||||
result = regex_search_string(PATTERN_1, HTML_STRING)
|
||||
assert result is not None
|
||||
assert result == EXPECTED_RESULT
|
||||
assert result == EXPECTED_RESULT_1
|
||||
|
||||
|
||||
def test_regex_search_multiple():
|
||||
def test_regex_search_multiple_regexes():
|
||||
result = regex_search_string(
|
||||
[r'dupsko', PATTERN, r'byle jakie'], HTML_STRING)
|
||||
[r'dupsko', PATTERN_1, r'byle jakie'], HTML_STRING)
|
||||
assert result is not None
|
||||
assert result == EXPECTED_RESULT
|
||||
assert result == EXPECTED_RESULT_1
|
||||
|
||||
|
||||
def test_regex_search_invalid():
|
||||
result = regex_search_string([r'dupsko', r'czorne'], HTML_STRING)
|
||||
assert result is None
|
||||
assert result != EXPECTED_RESULT
|
||||
assert result != EXPECTED_RESULT_1
|
||||
|
||||
|
||||
def test_regex_search_default():
|
||||
|
@ -36,3 +39,29 @@ def test_regex_search_default():
|
|||
[r'dupsko', r'czorne'], HTML_STRING, default=default)
|
||||
assert result is not None
|
||||
assert result == default
|
||||
|
||||
|
||||
def test_regex_search_multiple_results():
|
||||
results = regex_search_string(PATTERN_2, HTML_STRING, multiple=True)
|
||||
assert results is not None
|
||||
assert isinstance(results, list)
|
||||
assert len(results) == len(EXPECTED_RESULT_2)
|
||||
for i in range(len(results)):
|
||||
assert isinstance(results[i], str)
|
||||
assert results[i] == EXPECTED_RESULT_2[i]
|
||||
|
||||
|
||||
def test_regex_search_whole_single_match():
|
||||
result = regex_search_string(PATTERN_1, HTML_STRING, whole_match=True)
|
||||
assert result is not None
|
||||
assert result.group(1) == EXPECTED_RESULT_1
|
||||
|
||||
|
||||
def test_regex_search_multiple_whole_matches():
|
||||
results = regex_search_string(
|
||||
PATTERN_2, HTML_STRING, multiple=True, whole_match=True)
|
||||
assert results is not None
|
||||
assert isinstance(results, list)
|
||||
assert len(results) == len(EXPECTED_RESULT_2)
|
||||
for i in range(len(results)):
|
||||
assert results[i].group(1) == EXPECTED_RESULT_2[i]
|
||||
|
|
Loading…
Reference in New Issue