diff --git a/librefi/utils.py b/librefi/utils.py index 4b2cd6a..dba11f6 100644 --- a/librefi/utils.py +++ b/librefi/utils.py @@ -89,13 +89,25 @@ def get_email_address(): ]) -def regex_search_string(regexes, string, default=None): +def regex_search_string(regexes, string, default=None, multiple=False, whole_match=False): if not isinstance(regexes, list): regexes = [regexes] + results = [] for regex in regexes: - match = re.search(regex, string) - if match: - return match.group(1) + if multiple: + matches = re.finditer(regex, string) + for match in matches: + if not whole_match: + match = match.group(1) + results.append(match) + else: + match = re.search(regex, string) + if match: + if not whole_match: + match = match.group(1) + return match + if multiple: + return results return default diff --git a/tests/regex_utils.py b/tests/regex_utils.py index 0cf632e..c0fdc2e 100644 --- a/tests/regex_utils.py +++ b/tests/regex_utils.py @@ -3,31 +3,34 @@ from librefi.utils import regex_search_string HTML_STRING = """
+
""" -PATTERN = r'
]+action="([^"]+)' +PATTERN_1 = r']+action="([^"]+)' +EXPECTED_RESULT_1 = "/?your=mother" -EXPECTED_RESULT = "/?your=mother" +PATTERN_2 = r']*value="([^"]+)"' +EXPECTED_RESULT_2 = ["tesco", "stara"] -def test_regex_search_single(): - result = regex_search_string(PATTERN, HTML_STRING) +def test_regex_search_single_regex(): + result = regex_search_string(PATTERN_1, HTML_STRING) assert result is not None - assert result == EXPECTED_RESULT + assert result == EXPECTED_RESULT_1 -def test_regex_search_multiple(): +def test_regex_search_multiple_regexes(): result = regex_search_string( - [r'dupsko', PATTERN, r'byle jakie'], HTML_STRING) + [r'dupsko', PATTERN_1, r'byle jakie'], HTML_STRING) assert result is not None - assert result == EXPECTED_RESULT + assert result == EXPECTED_RESULT_1 def test_regex_search_invalid(): result = regex_search_string([r'dupsko', r'czorne'], HTML_STRING) assert result is None - assert result != EXPECTED_RESULT + assert result != EXPECTED_RESULT_1 def test_regex_search_default(): @@ -36,3 +39,29 @@ def test_regex_search_default(): [r'dupsko', r'czorne'], HTML_STRING, default=default) assert result is not None assert result == default + + +def test_regex_search_multiple_results(): + results = regex_search_string(PATTERN_2, HTML_STRING, multiple=True) + assert results is not None + assert isinstance(results, list) + assert len(results) == len(EXPECTED_RESULT_2) + for i in range(len(results)): + assert isinstance(results[i], str) + assert results[i] == EXPECTED_RESULT_2[i] + + +def test_regex_search_whole_single_match(): + result = regex_search_string(PATTERN_1, HTML_STRING, whole_match=True) + assert result is not None + assert result.group(1) == EXPECTED_RESULT_1 + + +def test_regex_search_multiple_whole_matches(): + results = regex_search_string( + PATTERN_2, HTML_STRING, multiple=True, whole_match=True) + assert results is not None + assert isinstance(results, list) + assert len(results) == len(EXPECTED_RESULT_2) + for i in range(len(results)): + assert results[i].group(1) == EXPECTED_RESULT_2[i]