Line Flow Count Block(s) Source
1 - /*
2 - * Copyright (C) the libgit2 contributors. All rights reserved.
3 - *
4 - * This file is part of libgit2, distributed under the GNU GPL v2 with
5 - * a Linking Exception. For full terms see the included COPYING file.
6 - *
7 - * Do shell-style pattern matching for ?, \, [], and * characters.
8 - * It is 8bit clean.
9 - *
10 - * Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
11 - * Rich $alz is now <rsalz@bbn.com>.
12 - *
13 - * Modified by Wayne Davison to special-case '/' matching, to make '**'
14 - * work differently than '*', and to fix the character-class code.
15 - *
16 - * Imported from git.git.
17 - */
18 -
19 - #include "wildmatch.h"
20 -
21 - #define GIT_SPACE 0x01
22 - #define GIT_DIGIT 0x02
23 - #define GIT_ALPHA 0x04
24 - #define GIT_GLOB_SPECIAL 0x08
25 - #define GIT_REGEX_SPECIAL 0x10
26 - #define GIT_PATHSPEC_MAGIC 0x20
27 - #define GIT_CNTRL 0x40
28 - #define GIT_PUNCT 0x80
29 -
30 - enum {
31 - S = GIT_SPACE,
32 - A = GIT_ALPHA,
33 - D = GIT_DIGIT,
34 - G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */
35 - R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | */
36 - P = GIT_PATHSPEC_MAGIC, /* other non-alnum, except for ] and } */
37 - X = GIT_CNTRL,
38 - U = GIT_PUNCT,
39 - Z = GIT_CNTRL | GIT_SPACE
40 - };
41 -
42 - static const unsigned char sane_ctype[256] = {
43 - X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X, /* 0.. 15 */
44 - X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 16.. 31 */
45 - S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P, /* 32.. 47 */
46 - D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G, /* 48.. 63 */
47 - P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */
48 - A, A, A, A, A, A, A, A, A, A, A, G, G, U, R, P, /* 80.. 95 */
49 - P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */
50 - A, A, A, A, A, A, A, A, A, A, A, R, R, U, P, X, /* 112..127 */
51 - /* Nothing in the 128.. range */
52 - };
53 -
54 - #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
55 - #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
56 -
57 - typedef unsigned char uchar;
58 -
59 - /* What character marks an inverted character class? */
60 - #define NEGATE_CLASS '!'
61 - #define NEGATE_CLASS2 '^'
62 -
63 - #define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \
64 - && *(class) == *(litmatch) \
65 - && strncmp((char*)class, litmatch, len) == 0)
66 -
67 - #if defined STDC_HEADERS || !defined isascii
68 - # define ISASCII(c) 1
69 - #else
70 - # define ISASCII(c) isascii(c)
71 - #endif
72 -
73 - #ifdef isblank
74 - # define ISBLANK(c) (ISASCII(c) && isblank(c))
75 - #else
76 - # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
77 - #endif
78 -
79 - #ifdef isgraph
80 - # define ISGRAPH(c) (ISASCII(c) && isgraph(c))
81 - #else
82 - # define ISGRAPH(c) (ISASCII(c) && isprint(c) && !isspace(c))
83 - #endif
84 -
85 - #define ISPRINT(c) (ISASCII(c) && isprint(c))
86 - #define ISDIGIT(c) (ISASCII(c) && isdigit(c))
87 - #define ISALNUM(c) (ISASCII(c) && isalnum(c))
88 - #define ISALPHA(c) (ISASCII(c) && isalpha(c))
89 - #define ISCNTRL(c) (ISASCII(c) && iscntrl(c))
90 - #define ISLOWER(c) (ISASCII(c) && islower(c))
91 - #define ISPUNCT(c) (ISASCII(c) && ispunct(c))
92 - #define ISSPACE(c) (ISASCII(c) && isspace(c))
93 - #define ISUPPER(c) (ISASCII(c) && isupper(c))
94 - #define ISXDIGIT(c) (ISASCII(c) && isxdigit(c))
95 -
96 - /* Match pattern "p" against "text" */
97 351017 2 static int dowild(const uchar *p, const uchar *text, unsigned int flags)
98 - {
99 - uchar p_ch;
100 351017 2 const uchar *pattern = p;
101 -
102 681476 2,241,242 for ( ; (p_ch = *p) != '\0'; text++, p++) {
103 - int matched, match_slash, negated;
104 - uchar t_ch, prev_ch;
105 678533 3,4 if ((t_ch = *text) == '\0' && p_ch != '*')
106 8351 5 return WM_ABORT_ALL;
107 670182 6-9 if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
108 70 10 t_ch = tolower(t_ch);
109 670182 11-14 if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
110 193 15 p_ch = tolower(p_ch);
111 670182 16 switch (p_ch) {
112 - case '\\':
113 - /* Literal match with following character. Note that the test
114 - * in "default" handles the p[1] == '\0' failure case. */
115 92 17 p_ch = *++p;
116 - /* FALLTHROUGH */
117 - default:
118 643973 18 if (t_ch != p_ch)
119 314094 19 return WM_NOMATCH;
120 329879 20 continue;
121 - case '?':
122 - /* Match anything but '/'. */
123 54 21,22 if ((flags & WM_PATHNAME) && t_ch == '/')
124 4 23 return WM_NOMATCH;
125 50 24 continue;
126 - case '*':
127 25595 25 if (*++p == '*') {
128 313 26 const uchar *prev_p = p - 2;
129 313 26,27 while (*++p == '*') {}
130 313 28 if (!(flags & WM_PATHNAME))
131 - /* without WM_PATHNAME, '*' == '**' */
132 91 29 match_slash = 1;
133 222 30-32 else if ((prev_p < pattern || *prev_p == '/') &&
134 214 32-34 (*p == '\0' || *p == '/' ||
135 8 34,35 (p[0] == '\\' && p[1] == '/'))) {
136 - /*
137 - * Assuming we already match 'foo/' and are at
138 - * <star star slash>, just assume it matches
139 - * nothing and go ahead match the rest of the
140 - * pattern with the remaining string. This
141 - * helps make foo/<*><*>/bar (<> because
142 - * otherwise it breaks C comment syntax) match
143 - * both foo/bar and foo/a/bar.
144 - */
145 206 36,38 if (p[0] == '/' &&
146 177 37 dowild(p + 1, text, flags) == WM_MATCH)
147 22 39 return WM_MATCH;
148 184 40 match_slash = 1;
149 - } else /* WM_PATHNAME is set */
150 291 41,42 match_slash = 0;
151 - } else
152 - /* without WM_PATHNAME, '*' == '**' */
153 25282 43 match_slash = flags & WM_PATHNAME ? 0 : 1;
154 25573 44 if (*p == '\0') {
155 - /* Trailing "**" matches everything. Trailing "*" matches
156 - * only if there are no more slash characters. */
157 18423 45 if (!match_slash) {
158 153 46 if (strchr((char*)text, '/') != NULL)
159 75 47 return WM_NOMATCH;
160 - }
161 18348 48 return WM_MATCH;
162 7150 49,50 } else if (!match_slash && *p == '/') {
163 - /*
164 - * _one_ asterisk followed by a slash
165 - * with WM_PATHNAME matches the next
166 - * directory
167 - */
168 166 51 const char *slash = strchr((char*)text, '/');
169 166 51 if (!slash)
170 20 52 return WM_NOMATCH;
171 146 53 text = (const uchar*)slash;
172 - /* the slash is consumed by the top-level for loop */
173 146 53 break;
174 - }
175 - while (1) {
176 11841 54 if (t_ch == '\0')
177 188 55 break;
178 - /*
179 - * Try to advance faster when an asterisk is
180 - * followed by a literal. We know in this case
181 - * that the string before the literal
182 - * must belong to "*".
183 - * If match_slash is false, do not look past
184 - * the first slash as it cannot belong to '*'.
185 - */
186 11653 56 if (!is_glob_special(*p)) {
187 11587 57 p_ch = *p;
188 11587 57-60 if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
189 12 61 p_ch = tolower(p_ch);
190 66920 62,71,72 while ((t_ch = *text) != '\0' &&
191 8884 73 (match_slash || t_ch != '/')) {
192 61387 63-66 if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
193 52 67 t_ch = tolower(t_ch);
194 61387 68 if (t_ch == p_ch)
195 6054 69 break;
196 55333 70 text++;
197 - }
198 11587 74 if (t_ch != p_ch)
199 5533 75 return WM_NOMATCH;
200 - }
201 6120 76,77 if ((matched = dowild(p, text, flags)) != WM_NOMATCH) {
202 1263 78,79 if (!match_slash || matched != WM_ABORT_TO_STARSTAR)
203 1263 80 return matched;
204 4857 81,82 } else if (!match_slash && t_ch == '/')
205 ##### 83 return WM_ABORT_TO_STARSTAR;
206 4857 84 t_ch = *++text;
207 4857 84 }
208 188 85 return WM_ABORT_ALL;
209 - case '[':
210 560 86 p_ch = *++p;
211 - #ifdef NEGATE_CLASS2
212 560 86 if (p_ch == NEGATE_CLASS2)
213 52 87 p_ch = NEGATE_CLASS;
214 - #endif
215 - /* Assign literal 1/0 because of "matched" comparison. */
216 560 88 negated = p_ch == NEGATE_CLASS ? 1 : 0;
217 560 88 if (negated) {
218 - /* Inverted character class. */
219 116 89 p_ch = *++p;
220 - }
221 560 90 prev_ch = 0;
222 560 90 matched = 0;
223 - do {
224 1368 91 if (!p_ch)
225 34 92 return WM_ABORT_ALL;
226 1334 93 if (p_ch == '\\') {
227 60 94 p_ch = *++p;
228 60 94 if (!p_ch)
229 ##### 95 return WM_ABORT_ALL;
230 60 96 if (t_ch == p_ch)
231 60 97,98 matched = 1;
232 1274 99-102 } else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') {
233 297 103 p_ch = *++p;
234 297 103 if (p_ch == '\\') {
235 32 104 p_ch = *++p;
236 32 104 if (!p_ch)
237 ##### 105 return WM_ABORT_ALL;
238 - }
239 297 106,107 if (t_ch <= p_ch && t_ch >= prev_ch)
240 147 108 matched = 1;
241 150 109-112 else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) {
242 32 113 uchar t_ch_upper = toupper(t_ch);
243 32 113,114 if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch)
244 10 115 matched = 1;
245 - }
246 297 116 p_ch = 0; /* This makes "prev_ch" get set to 0. */
247 977 117,118,232 } else if (p_ch == '[' && p[1] == ':') {
248 - const uchar *s;
249 - int i;
250 1624 119-122 for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/
251 236 123 if (!p_ch)
252 ##### 124 return WM_ABORT_ALL;
253 236 125 i = (int)(p - s - 1);
254 236 125,126 if (i < 0 || p[-1] != ':') {
255 - /* Didn't find ":]", so treat like a normal set. */
256 8 127 p = s - 2;
257 8 127 p_ch = '[';
258 8 127 if (t_ch == p_ch)
259 8 128 matched = 1;
260 8 129 continue;
261 - }
262 228 130-132 if (CC_EQ(s,i, "alnum")) {
263 8 133-135,137 if (ISALNUM(t_ch))
264 ##### 136 matched = 1;
265 220 138-140 } else if (CC_EQ(s,i, "alpha")) {
266 12 141-143,145 if (ISALPHA(t_ch))
267 4 144 matched = 1;
268 208 146-148 } else if (CC_EQ(s,i, "blank")) {
269 8 149-151,153 if (ISBLANK(t_ch))
270 ##### 152 matched = 1;
271 200 154-156 } else if (CC_EQ(s,i, "cntrl")) {
272 8 157-159,161 if (ISCNTRL(t_ch))
273 ##### 160 matched = 1;
274 192 162-164 } else if (CC_EQ(s,i, "digit")) {
275 56 165-167,169 if (ISDIGIT(t_ch))
276 16 168 matched = 1;
277 136 170-172 } else if (CC_EQ(s,i, "graph")) {
278 4 173-175,177 if (ISGRAPH(t_ch))
279 4 176 matched = 1;
280 132 178-180 } else if (CC_EQ(s,i, "lower")) {
281 16 181-183,185 if (ISLOWER(t_ch))
282 6 184 matched = 1;
283 116 186-188 } else if (CC_EQ(s,i, "print")) {
284 4 189-191,193 if (ISPRINT(t_ch))
285 4 192 matched = 1;
286 112 194-196 } else if (CC_EQ(s,i, "punct")) {
287 8 197-199,201 if (ISPUNCT(t_ch))
288 8 200 matched = 1;
289 104 202-204 } else if (CC_EQ(s,i, "space")) {
290 32 205-207,209 if (ISSPACE(t_ch))
291 4 208 matched = 1;
292 72 210-212 } else if (CC_EQ(s,i, "upper")) {
293 44 213-215,222 if (ISUPPER(t_ch))
294 6 216 matched = 1;
295 38 217-220 else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch))
296 10 221 matched = 1;
297 28 223-225 } else if (CC_EQ(s,i, "xdigit")) {
298 20 226-228,230 if (ISXDIGIT(t_ch))
299 12 229 matched = 1;
300 - } else /* malformed [:class:] string */
301 8 231 return WM_ABORT_ALL;
302 220 232 p_ch = 0; /* This makes "prev_ch" get set to 0. */
303 741 233 } else if (t_ch == p_ch)
304 177 234 matched = 1;
305 1326 235 } while (prev_ch = p_ch, (p_ch = *++p) != ']');
306 518 236,237 if (matched == negated ||
307 394 237,238 ((flags & WM_PATHNAME) && t_ch == '/'))
308 134 239 return WM_NOMATCH;
309 384 240 continue;
310 - }
311 - }
312 -
313 2943 243 return *text ? WM_NOMATCH : WM_MATCH;
314 - }
315 -
316 - /* Match the "pattern" against the "text" string. */
317 344732 2 int wildmatch(const char *pattern, const char *text, unsigned int flags)
318 - {
319 344732 2 return dowild((const uchar*)pattern, (const uchar*)text, flags);
320 - }