Add virStringSearch method for regex matching

Add a virStringSearch method to virstring.{c,h} which performs
a regex match against a string and returns the matching substrings.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
This commit is contained in:
Manuel VIVES 2014-01-23 10:28:29 +01:00 committed by Daniel P. Berrange
parent 68954fb25c
commit 12aa71dfde
5 changed files with 213 additions and 0 deletions

View File

@ -192,6 +192,7 @@ src/util/virscsi.c
src/util/virsocketaddr.c
src/util/virstatslinux.c
src/util/virstoragefile.c
src/util/virstring.c
src/util/virsysinfo.c
src/util/virerror.c
src/util/virerror.h

View File

@ -1798,6 +1798,7 @@ virStringArrayHasString;
virStringFreeList;
virStringJoin;
virStringListLength;
virStringSearch;
virStringSortCompare;
virStringSortRevCompare;
virStringSplit;

View File

@ -23,12 +23,14 @@
#include <stdlib.h>
#include <stdio.h>
#include <regex.h>
#include "c-ctype.h"
#include "virstring.h"
#include "viralloc.h"
#include "virbuffer.h"
#include "virerror.h"
#include "virlog.h"
#define VIR_FROM_THIS VIR_FROM_NONE
@ -645,3 +647,105 @@ int virStringSortRevCompare(const void *a, const void *b)
return strcmp(*sb, *sa);
}
/**
* virStringSearch:
* @str: string to search
* @regexp: POSIX Extended regular expression pattern used for matching
* @max_matches: maximum number of substrings to return
* @result: pointer to an array to be filled with NULL terminated list of matches
*
* Performs a POSIX extended regex search against a string and return all matching substrings.
* The @result value should be freed with virStringFreeList() when no longer
* required.
*
* @code
* char *source = "6853a496-1c10-472e-867a-8244937bd6f0
* 773ab075-4cd7-4fc2-8b6e-21c84e9cb391
* bbb3c75c-d60f-43b0-b802-fd56b84a4222
* 60c04aa1-0375-4654-8d9f-e149d9885273
* 4548d465-9891-4c34-a184-3b1c34a26aa8";
* char **matches = NULL;
* virStringSearch(source,
* "([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})",
* 3,
* &matches);
*
* // matches[0] == "6853a496-1c10-472e-867a-8244937bd6f0";
* // matches[1] == "773ab075-4cd7-4fc2-8b6e-21c84e9cb391";
* // matches[2] == "bbb3c75c-d60f-43b0-b802-fd56b84a4222"
* // matches[3] == NULL;
*
* virStringFreeList(matches);
* @endcode
*
* Returns: -1 on error, or number of matches
*/
ssize_t
virStringSearch(const char *str,
const char *regexp,
size_t max_matches,
char ***matches)
{
regex_t re;
regmatch_t rem;
size_t nmatches = 0;
ssize_t ret = -1;
int rv = -1;
*matches = NULL;
VIR_DEBUG("search '%s' for '%s'", str, regexp);
if ((rv = regcomp(&re, regexp, REG_EXTENDED)) != 0) {
char error[100];
regerror(rv, &re, error, sizeof(error));
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Error while compiling regular expression '%s': %s"),
regexp, error);
return -1;
}
if (re.re_nsub != 1) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Regular expression '%s' must have exactly 1 match group, not %zu"),
regexp, re.re_nsub);
goto cleanup;
}
/* '*matches' must always be NULL terminated in every iteration
* of the loop, so start by allocating 1 element
*/
if (VIR_EXPAND_N(*matches, nmatches, 1) < 0)
goto cleanup;
while ((nmatches - 1) < max_matches) {
char *match;
if (regexec(&re, str, 1, &rem, 0) != 0)
break;
if (VIR_EXPAND_N(*matches, nmatches, 1) < 0)
goto cleanup;
if (VIR_STRNDUP(match, str + rem.rm_so,
rem.rm_eo - rem.rm_so) < 0)
goto cleanup;
VIR_DEBUG("Got '%s'", match);
(*matches)[nmatches-2] = match;
str = str + rem.rm_eo;
}
ret = nmatches - 1; /* don't count the trailing null */
cleanup:
regfree(&re);
if (ret < 0) {
virStringFreeList(*matches);
*matches = NULL;
}
return ret;
}

View File

@ -226,4 +226,11 @@ size_t virStringListLength(char **strings);
int virStringSortCompare(const void *a, const void *b);
int virStringSortRevCompare(const void *a, const void *b);
ssize_t virStringSearch(const char *str,
const char *regexp,
size_t max_results,
char ***matches)
ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_NONNULL(4);
#endif /* __VIR_STRING_H__ */

View File

@ -274,6 +274,70 @@ testStringSortCompare(const void *opaque ATTRIBUTE_UNUSED)
}
struct stringSearchData {
const char *str;
const char *regexp;
size_t maxMatches;
size_t expectNMatches;
const char **expectMatches;
bool expectError;
};
static int
testStringSearch(const void *opaque ATTRIBUTE_UNUSED)
{
const struct stringSearchData *data = opaque;
char **matches = NULL;
ssize_t nmatches;
int ret = -1;
nmatches = virStringSearch(data->str, data->regexp,
data->maxMatches, &matches);
if (data->expectError) {
if (nmatches != -1) {
fprintf(stderr, "expected error on %s but got %zd matches\n",
data->str, nmatches);
goto cleanup;
}
} else {
size_t i;
if (nmatches < 0) {
fprintf(stderr, "expected %zu matches on %s but got error\n",
data->expectNMatches, data->str);
goto cleanup;
}
if (nmatches != data->expectNMatches) {
fprintf(stderr, "expected %zu matches on %s but got %zd\n",
data->expectNMatches, data->str, nmatches);
goto cleanup;
}
if (virStringListLength(matches) != nmatches) {
fprintf(stderr, "expected %zu matches on %s but got %zd matches\n",
data->expectNMatches, data->str,
virStringListLength(matches));
goto cleanup;
}
for (i = 0; i < nmatches; i++) {
if (STRNEQ(matches[i], data->expectMatches[i])) {
fprintf(stderr, "match %zu expected '%s' but got '%s'\n",
i, data->expectMatches[i], matches[i]);
goto cleanup;
}
}
}
ret = 0;
cleanup:
virStringFreeList(matches);
return ret;
}
static int
mymain(void)
{
@ -328,6 +392,42 @@ mymain(void)
if (virtTestRun("virStringSortCompare", testStringSortCompare, NULL) < 0)
ret = -1;
#define TEST_SEARCH(s, r, x, n, m, e) \
do { \
struct stringSearchData data = { \
.str = s, \
.maxMatches = x, \
.regexp = r, \
.expectNMatches = n, \
.expectMatches = m, \
.expectError = e, \
}; \
if (virtTestRun("virStringSearch " s, testStringSearch, &data) < 0) \
ret = -1; \
} while (0)
/* error due to missing () in regexp */
TEST_SEARCH("foo", "bar", 10, 0, NULL, true);
/* error due to too many () in regexp */
TEST_SEARCH("foo", "(b)(a)(r)", 10, 0, NULL, true);
/* None matching */
TEST_SEARCH("foo", "(bar)", 10, 0, NULL, false);
/* Full match */
const char *matches1[] = { "foo" };
TEST_SEARCH("foo", "(foo)", 10, 1, matches1, false);
/* Multi matches */
const char *matches2[] = { "foo", "bar", "eek" };
TEST_SEARCH("1foo2bar3eek", "([a-z]+)", 10, 3, matches2, false);
/* Multi matches, limited returns */
const char *matches3[] = { "foo", "bar" };
TEST_SEARCH("1foo2bar3eek", "([a-z]+)", 2, 2, matches3, false);
return ret==0 ? EXIT_SUCCESS : EXIT_FAILURE;
}