source src/buf_text.c
| Line | Flow | Count | Block(s) | Source |
|---|---|---|---|---|
| 1 | - | /* | ||
| 2 | - | * Copyright (C) the libgit2 contributors. All rights reserved. | ||
| 3 | - | * | ||
| 4 | - | * This file is part of libgit2, distributed under the GNU GPL v2 with | ||
| 5 | - | * a Linking Exception. For full terms see the included COPYING file. | ||
| 6 | - | */ | ||
| 7 | - | #include "buf_text.h" | ||
| 8 | - | |||
| 9 | ![]() |
57 | 2 | int git_buf_text_puts_escaped( |
| 10 | - | git_buf *buf, | ||
| 11 | - | const char *string, | ||
| 12 | - | const char *esc_chars, | ||
| 13 | - | const char *esc_with) | ||
| 14 | - | { | ||
| 15 | - | const char *scan; | ||
| 16 | 57 | 2 | size_t total = 0, esc_len = strlen(esc_with), count, alloclen; | |
| 17 | - | |||
| 18 | 57 | 2 | if (!string) | |
| 19 | ##### | 3 | return 0; | |
| 20 | - | |||
| 21 | 176 | 4,6 | for (scan = string; *scan; ) { | |
| 22 | - | /* count run of non-escaped characters */ | ||
| 23 | 119 | 5 | count = strcspn(scan, esc_chars); | |
| 24 | 119 | 5 | total += count; | |
| 25 | 119 | 5 | scan += count; | |
| 26 | - | /* count run of escaped characters */ | ||
| 27 | 119 | 5 | count = strspn(scan, esc_chars); | |
| 28 | 119 | 5 | total += count * (esc_len + 1); | |
| 29 | 119 | 5 | scan += count; | |
| 30 | - | } | ||
| 31 | - | |||
| 32 | 57 | 7-13 | GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, total, 1); | |
| 33 | 57 | 14,15 | if (git_buf_grow_by(buf, alloclen) < 0) | |
| 34 | ##### | 16 | return -1; | |
| 35 | - | |||
| 36 | 176 | 17,21 | for (scan = string; *scan; ) { | |
| 37 | 119 | 18 | count = strcspn(scan, esc_chars); | |
| 38 | - | |||
| 39 | 119 | 18 | memmove(buf->ptr + buf->size, scan, count); | |
| 40 | 119 | 18 | scan += count; | |
| 41 | 119 | 18 | buf->size += count; | |
| 42 | - | |||
| 43 | 188 | 18-20 | for (count = strspn(scan, esc_chars); count > 0; --count) { | |
| 44 | - | /* copy escape sequence */ | ||
| 45 | 69 | 19 | memmove(buf->ptr + buf->size, esc_with, esc_len); | |
| 46 | 69 | 19 | buf->size += esc_len; | |
| 47 | - | /* copy character to be escaped */ | ||
| 48 | 69 | 19 | buf->ptr[buf->size] = *scan; | |
| 49 | 69 | 19 | buf->size++; | |
| 50 | 69 | 19 | scan++; | |
| 51 | - | } | ||
| 52 | - | } | ||
| 53 | - | |||
| 54 | 57 | 22 | buf->ptr[buf->size] = '\0'; | |
| 55 | - | |||
| 56 | 57 | 22 | return 0; | |
| 57 | - | } | ||
| 58 | - | |||
| 59 | 3082 | 2 | void git_buf_text_unescape(git_buf *buf) | |
| 60 | - | { | ||
| 61 | 3082 | 2 | buf->size = git__unescape(buf->ptr); | |
| 62 | 3082 | 3 | } | |
| 63 | - | |||
| 64 | ![]() |
606 | 2 | int git_buf_text_crlf_to_lf(git_buf *tgt, const git_buf *src) |
| 65 | - | { | ||
| 66 | 606 | 2 | const char *scan = src->ptr; | |
| 67 | 606 | 2 | const char *scan_end = src->ptr + src->size; | |
| 68 | 606 | 2 | const char *next = memchr(scan, '\r', src->size); | |
| 69 | - | size_t new_size; | ||
| 70 | - | char *out; | ||
| 71 | - | |||
| 72 | 606 | 2,3 | assert(tgt != src); | |
| 73 | - | |||
| 74 | 606 | 4 | if (!next) | |
| 75 | 3 | 5 | return git_buf_set(tgt, src->ptr, src->size); | |
| 76 | - | |||
| 77 | - | /* reduce reallocs while in the loop */ | ||
| 78 | 603 | 6-12 | GIT_ERROR_CHECK_ALLOC_ADD(&new_size, src->size, 1); | |
| 79 | 603 | 13,14 | if (git_buf_grow(tgt, new_size) < 0) | |
| 80 | ##### | 15 | return -1; | |
| 81 | - | |||
| 82 | 603 | 16 | out = tgt->ptr; | |
| 83 | 603 | 16 | tgt->size = 0; | |
| 84 | - | |||
| 85 | - | /* Find the next \r and copy whole chunk up to there to tgt */ | ||
| 86 | 3750 | 16,22,23 | for (; next; scan = next + 1, next = memchr(scan, '\r', scan_end - scan)) { | |
| 87 | 3147 | 17 | if (next > scan) { | |
| 88 | 3143 | 18 | size_t copylen = (size_t)(next - scan); | |
| 89 | 3143 | 18 | memcpy(out, scan, copylen); | |
| 90 | 3143 | 18 | out += copylen; | |
| 91 | - | } | ||
| 92 | - | |||
| 93 | - | /* Do not drop \r unless it is followed by \n */ | ||
| 94 | 3147 | 19,20 | if (next + 1 == scan_end || next[1] != '\n') | |
| 95 | 131 | 21 | *out++ = '\r'; | |
| 96 | - | } | ||
| 97 | - | |||
| 98 | - | /* Copy remaining input into dest */ | ||
| 99 | 603 | 24 | if (scan < scan_end) { | |
| 100 | 539 | 25 | size_t remaining = (size_t)(scan_end - scan); | |
| 101 | 539 | 25 | memcpy(out, scan, remaining); | |
| 102 | 539 | 25 | out += remaining; | |
| 103 | - | } | ||
| 104 | - | |||
| 105 | 603 | 26 | tgt->size = (size_t)(out - tgt->ptr); | |
| 106 | 603 | 26 | tgt->ptr[tgt->size] = '\0'; | |
| 107 | - | |||
| 108 | 603 | 26 | return 0; | |
| 109 | - | } | ||
| 110 | - | |||
| 111 | ![]() |
164 | 2 | int git_buf_text_lf_to_crlf(git_buf *tgt, const git_buf *src) |
| 112 | - | { | ||
| 113 | 164 | 2 | const char *start = src->ptr; | |
| 114 | 164 | 2 | const char *end = start + src->size; | |
| 115 | 164 | 2 | const char *scan = start; | |
| 116 | 164 | 2 | const char *next = memchr(scan, '\n', src->size); | |
| 117 | - | size_t alloclen; | ||
| 118 | - | |||
| 119 | 164 | 2,3 | assert(tgt != src); | |
| 120 | - | |||
| 121 | 164 | 4 | if (!next) | |
| 122 | 1 | 5 | return git_buf_set(tgt, src->ptr, src->size); | |
| 123 | - | |||
| 124 | - | /* attempt to reduce reallocs while in the loop */ | ||
| 125 | 163 | 6-12 | GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, src->size, src->size >> 4); | |
| 126 | 163 | 13-19 | GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, alloclen, 1); | |
| 127 | 163 | 20,21 | if (git_buf_grow(tgt, alloclen) < 0) | |
| 128 | ##### | 22 | return -1; | |
| 129 | 163 | 23 | tgt->size = 0; | |
| 130 | - | |||
| 131 | 1199 | 23,39,40 | for (; next; scan = next + 1, next = memchr(scan, '\n', end - scan)) { | |
| 132 | 1036 | 24 | size_t copylen = next - scan; | |
| 133 | - | |||
| 134 | - | /* if we find mixed line endings, carry on */ | ||
| 135 | 1036 | 24,25 | if (copylen && next[-1] == '\r') | |
| 136 | 109 | 26 | copylen--; | |
| 137 | - | |||
| 138 | 1036 | 27-33 | GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, copylen, 3); | |
| 139 | 1036 | 34,35 | if (git_buf_grow_by(tgt, alloclen) < 0) | |
| 140 | ##### | 36 | return -1; | |
| 141 | - | |||
| 142 | 1036 | 37 | if (copylen) { | |
| 143 | 1017 | 38 | memcpy(tgt->ptr + tgt->size, scan, copylen); | |
| 144 | 1017 | 38 | tgt->size += copylen; | |
| 145 | - | } | ||
| 146 | - | |||
| 147 | 1036 | 39 | tgt->ptr[tgt->size++] = '\r'; | |
| 148 | 1036 | 39 | tgt->ptr[tgt->size++] = '\n'; | |
| 149 | - | } | ||
| 150 | - | |||
| 151 | 163 | 41 | tgt->ptr[tgt->size] = '\0'; | |
| 152 | 163 | 41 | return git_buf_put(tgt, scan, end - scan); | |
| 153 | - | } | ||
| 154 | - | |||
| 155 | ![]() |
3505 | 2 | int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strings) |
| 156 | - | { | ||
| 157 | - | size_t i; | ||
| 158 | - | const char *str, *pfx; | ||
| 159 | - | |||
| 160 | 3505 | 2 | git_buf_clear(buf); | |
| 161 | - | |||
| 162 | 3505 | 3,4 | if (!strings || !strings->count) | |
| 163 | ##### | 5 | return 0; | |
| 164 | - | |||
| 165 | - | /* initialize common prefix to first string */ | ||
| 166 | 3505 | 6,7 | if (git_buf_sets(buf, strings->strings[0]) < 0) | |
| 167 | ##### | 8 | return -1; | |
| 168 | - | |||
| 169 | - | /* go through the rest of the strings, truncating to shared prefix */ | ||
| 170 | 3529 | 9,17,18 | for (i = 1; i < strings->count; ++i) { | |
| 171 | - | |||
| 172 | 588 | 10,12 | for (str = strings->strings[i], pfx = buf->ptr; | |
| 173 | 583 | 11,13 | *str && *str == *pfx; str++, pfx++) | |
| 174 | - | /* scanning */; | ||
| 175 | - | |||
| 176 | 436 | 14 | git_buf_truncate(buf, pfx - buf->ptr); | |
| 177 | - | |||
| 178 | 436 | 15 | if (!buf->size) | |
| 179 | 412 | 16 | break; | |
| 180 | - | } | ||
| 181 | - | |||
| 182 | 3505 | 19 | return 0; | |
| 183 | - | } | ||
| 184 | - | |||
| 185 | ![]() |
618 | 2 | bool git_buf_text_is_binary(const git_buf *buf) |
| 186 | - | { | ||
| 187 | 618 | 2 | const char *scan = buf->ptr, *end = buf->ptr + buf->size; | |
| 188 | - | git_bom_t bom; | ||
| 189 | 618 | 2 | int printable = 0, nonprintable = 0; | |
| 190 | - | |||
| 191 | 618 | 2 | scan += git_buf_text_detect_bom(&bom, buf); | |
| 192 | - | |||
| 193 | 618 | 3 | if (bom > GIT_BOM_UTF8) | |
| 194 | ##### | 4 | return 1; | |
| 195 | - | |||
| 196 | 116228 | 5,17 | while (scan < end) { | |
| 197 | 115613 | 6 | unsigned char c = *scan++; | |
| 198 | - | |||
| 199 | - | /* Printable characters are those above SPACE (0x1F) excluding DEL, | ||
| 200 | - | * and including BS, ESC and FF. | ||
| 201 | - | */ | ||
| 202 | 115613 | 6-10 | if ((c > 0x1F && c != 127) || c == '\b' || c == '\033' || c == '\014') | |
| 203 | 110275 | 11 | printable++; | |
| 204 | 5338 | 12 | else if (c == '\0') | |
| 205 | 3 | 13 | return true; | |
| 206 | 5335 | 14,15 | else if (!git__isspace(c)) | |
| 207 | 1 | 16 | nonprintable++; | |
| 208 | - | } | ||
| 209 | - | |||
| 210 | 615 | 18 | return ((printable >> 7) < nonprintable); | |
| 211 | - | } | ||
| 212 | - | |||
| 213 | 2094 | 2 | bool git_buf_text_contains_nul(const git_buf *buf) | |
| 214 | - | { | ||
| 215 | 2094 | 2 | return (memchr(buf->ptr, '\0', buf->size) != NULL); | |
| 216 | - | } | ||
| 217 | - | |||
| 218 | ![]() |
201220 | 2 | int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf) |
| 219 | - | { | ||
| 220 | - | const char *ptr; | ||
| 221 | - | size_t len; | ||
| 222 | - | |||
| 223 | 201220 | 2 | *bom = GIT_BOM_NONE; | |
| 224 | - | /* need at least 2 bytes to look for any BOM */ | ||
| 225 | 201220 | 2 | if (buf->size < 2) | |
| 226 | 174833 | 3 | return 0; | |
| 227 | - | |||
| 228 | 26387 | 4 | ptr = buf->ptr; | |
| 229 | 26387 | 4 | len = buf->size; | |
| 230 | - | |||
| 231 | 26387 | 4 | switch (*ptr++) { | |
| 232 | - | case 0: | ||
| 233 | 1 | 5-8 | if (len >= 4 && ptr[0] == 0 && ptr[1] == '\xFE' && ptr[2] == '\xFF') { | |
| 234 | ##### | 9 | *bom = GIT_BOM_UTF32_BE; | |
| 235 | ##### | 9 | return 4; | |
| 236 | - | } | ||
| 237 | 1 | 10 | break; | |
| 238 | - | case '\xEF': | ||
| 239 | 407 | 11-13 | if (len >= 3 && ptr[0] == '\xBB' && ptr[1] == '\xBF') { | |
| 240 | 407 | 14 | *bom = GIT_BOM_UTF8; | |
| 241 | 407 | 14 | return 3; | |
| 242 | - | } | ||
| 243 | ##### | 15 | break; | |
| 244 | - | case '\xFE': | ||
| 245 | 3 | 16 | if (*ptr == '\xFF') { | |
| 246 | 3 | 17 | *bom = GIT_BOM_UTF16_BE; | |
| 247 | 3 | 17 | return 2; | |
| 248 | - | } | ||
| 249 | ##### | 18 | break; | |
| 250 | - | case '\xFF': | ||
| 251 | 2 | 19 | if (*ptr != '\xFE') | |
| 252 | 1 | 20 | break; | |
| 253 | 1 | 21-23 | if (len >= 4 && ptr[1] == 0 && ptr[2] == 0) { | |
| 254 | ##### | 24 | *bom = GIT_BOM_UTF32_LE; | |
| 255 | ##### | 24 | return 4; | |
| 256 | - | } else { | ||
| 257 | 1 | 25 | *bom = GIT_BOM_UTF16_LE; | |
| 258 | 1 | 25 | return 2; | |
| 259 | - | } | ||
| 260 | - | break; | ||
| 261 | - | default: | ||
| 262 | 25974 | 26 | break; | |
| 263 | - | } | ||
| 264 | - | |||
| 265 | 25976 | 27 | return 0; | |
| 266 | - | } | ||
| 267 | - | |||
| 268 | ![]() |
2084 | 2 | bool git_buf_text_gather_stats( |
| 269 | - | git_buf_text_stats *stats, const git_buf *buf, bool skip_bom) | ||
| 270 | - | { | ||
| 271 | 2084 | 2 | const char *scan = buf->ptr, *end = buf->ptr + buf->size; | |
| 272 | - | int skip; | ||
| 273 | - | |||
| 274 | 2084 | 2 | memset(stats, 0, sizeof(*stats)); | |
| 275 | - | |||
| 276 | - | /* BOM detection */ | ||
| 277 | 2084 | 2 | skip = git_buf_text_detect_bom(&stats->bom, buf); | |
| 278 | 2084 | 3 | if (skip_bom) | |
| 279 | ##### | 4 | scan += skip; | |
| 280 | - | |||
| 281 | - | /* Ignore EOF character */ | ||
| 282 | 2084 | 5,6 | if (buf->size > 0 && end[-1] == '\032') | |
| 283 | ##### | 7 | end--; | |
| 284 | - | |||
| 285 | - | /* Counting loop */ | ||
| 286 | 199241 | 8,21 | while (scan < end) { | |
| 287 | 197157 | 9 | unsigned char c = *scan++; | |
| 288 | - | |||
| 289 | 197157 | 9,10 | if (c > 0x1F && c != 0x7F) | |
| 290 | 175434 | 11 | stats->printable++; | |
| 291 | 21723 | 12 | else switch (c) { | |
| 292 | - | case '\0': | ||
| 293 | 425 | 13 | stats->nul++; | |
| 294 | 425 | 13 | stats->nonprintable++; | |
| 295 | 425 | 13 | break; | |
| 296 | - | case '\n': | ||
| 297 | 12362 | 14 | stats->lf++; | |
| 298 | 12362 | 14 | break; | |
| 299 | - | case '\r': | ||
| 300 | 5956 | 15 | stats->cr++; | |
| 301 | 5956 | 15,16 | if (scan < end && *scan == '\n') | |
| 302 | 5052 | 17 | stats->crlf++; | |
| 303 | 5956 | 18 | break; | |
| 304 | - | case '\t': case '\f': case '\v': case '\b': case 0x1b: /*ESC*/ | ||
| 305 | 431 | 19 | stats->printable++; | |
| 306 | 431 | 19 | break; | |
| 307 | - | default: | ||
| 308 | 2549 | 20 | stats->nonprintable++; | |
| 309 | 2549 | 20 | break; | |
| 310 | - | } | ||
| 311 | - | } | ||
| 312 | - | |||
| 313 | - | /* Treat files with a bare CR as binary */ | ||
| 314 | 2084 | 22-24 | return (stats->cr != stats->crlf || stats->nul > 0 || | |
| 315 | 1677 | 24 | ((stats->printable >> 7) < stats->nonprintable)); | |
| 316 | - | } |