source src/xdiff/xprepare.c
| Line | Flow | Count | Block(s) | Source |
|---|---|---|---|---|
| 1 | - | /* | ||
| 2 | - | * LibXDiff by Davide Libenzi ( File Differential Library ) | ||
| 3 | - | * Copyright (C) 2003 Davide Libenzi | ||
| 4 | - | * | ||
| 5 | - | * This library is free software; you can redistribute it and/or | ||
| 6 | - | * modify it under the terms of the GNU Lesser General Public | ||
| 7 | - | * License as published by the Free Software Foundation; either | ||
| 8 | - | * version 2.1 of the License, or (at your option) any later version. | ||
| 9 | - | * | ||
| 10 | - | * This library is distributed in the hope that it will be useful, | ||
| 11 | - | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 12 | - | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 13 | - | * Lesser General Public License for more details. | ||
| 14 | - | * | ||
| 15 | - | * You should have received a copy of the GNU Lesser General Public | ||
| 16 | - | * License along with this library; if not, see | ||
| 17 | - | * <http://www.gnu.org/licenses/>. | ||
| 18 | - | * | ||
| 19 | - | * Davide Libenzi <davidel@xmailserver.org> | ||
| 20 | - | * | ||
| 21 | - | */ | ||
| 22 | - | |||
| 23 | - | #include "xinclude.h" | ||
| 24 | - | |||
| 25 | - | |||
| 26 | - | #define XDL_KPDIS_RUN 4 | ||
| 27 | - | #define XDL_MAX_EQLIMIT 1024 | ||
| 28 | - | #define XDL_SIMSCAN_WINDOW 100 | ||
| 29 | - | #define XDL_GUESS_NLINES1 256 | ||
| 30 | - | #define XDL_GUESS_NLINES2 20 | ||
| 31 | - | |||
| 32 | - | |||
| 33 | - | typedef struct s_xdlclass { | ||
| 34 | - | struct s_xdlclass *next; | ||
| 35 | - | unsigned long ha; | ||
| 36 | - | char const *line; | ||
| 37 | - | long size; | ||
| 38 | - | long idx; | ||
| 39 | - | long len1, len2; | ||
| 40 | - | } xdlclass_t; | ||
| 41 | - | |||
| 42 | - | typedef struct s_xdlclassifier { | ||
| 43 | - | unsigned int hbits; | ||
| 44 | - | long hsize; | ||
| 45 | - | xdlclass_t **rchash; | ||
| 46 | - | chastore_t ncha; | ||
| 47 | - | xdlclass_t **rcrecs; | ||
| 48 | - | long alloc; | ||
| 49 | - | long count; | ||
| 50 | - | long flags; | ||
| 51 | - | } xdlclassifier_t; | ||
| 52 | - | |||
| 53 | - | |||
| 54 | - | |||
| 55 | - | |||
| 56 | - | static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags); | ||
| 57 | - | static void xdl_free_classifier(xdlclassifier_t *cf); | ||
| 58 | - | static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t **rhash, | ||
| 59 | - | unsigned int hbits, xrecord_t *rec); | ||
| 60 | - | static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp, | ||
| 61 | - | xdlclassifier_t *cf, xdfile_t *xdf); | ||
| 62 | - | static void xdl_free_ctx(xdfile_t *xdf); | ||
| 63 | - | static int xdl_clean_mmatch(char const *dis, long i, long s, long e); | ||
| 64 | - | static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2); | ||
| 65 | - | static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2); | ||
| 66 | - | static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2); | ||
| 67 | - | |||
| 68 | - | |||
| 69 | - | |||
| 70 | - | |||
| 71 | 2123 | 2 | static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) { | |
| 72 | 2123 | 2 | cf->flags = flags; | |
| 73 | - | |||
| 74 | 2123 | 2 | cf->hbits = xdl_hashbits((unsigned int) size); | |
| 75 | 2123 | 3 | cf->hsize = 1 << cf->hbits; | |
| 76 | - | |||
| 77 | 2123 | 3,4 | if (xdl_cha_init(&cf->ncha, sizeof(xdlclass_t), size / 4 + 1) < 0) { | |
| 78 | - | |||
| 79 | ##### | 5 | return -1; | |
| 80 | - | } | ||
| 81 | 2123 | 6,7 | if (!(cf->rchash = (xdlclass_t **) xdl_malloc(cf->hsize * sizeof(xdlclass_t *)))) { | |
| 82 | - | |||
| 83 | ##### | 8 | xdl_cha_free(&cf->ncha); | |
| 84 | ##### | 9 | return -1; | |
| 85 | - | } | ||
| 86 | 2123 | 10 | memset(cf->rchash, 0, cf->hsize * sizeof(xdlclass_t *)); | |
| 87 | - | |||
| 88 | 2123 | 10 | cf->alloc = size; | |
| 89 | 2123 | 10,11 | if (!(cf->rcrecs = (xdlclass_t **) xdl_malloc(cf->alloc * sizeof(xdlclass_t *)))) { | |
| 90 | - | |||
| 91 | ##### | 12 | xdl_free(cf->rchash); | |
| 92 | ##### | 13 | xdl_cha_free(&cf->ncha); | |
| 93 | ##### | 14 | return -1; | |
| 94 | - | } | ||
| 95 | - | |||
| 96 | 2123 | 15 | cf->count = 0; | |
| 97 | - | |||
| 98 | 2123 | 15 | return 0; | |
| 99 | - | } | ||
| 100 | - | |||
| 101 | - | |||
| 102 | 2123 | 2 | static void xdl_free_classifier(xdlclassifier_t *cf) { | |
| 103 | - | |||
| 104 | 2123 | 2 | xdl_free(cf->rcrecs); | |
| 105 | 2123 | 3 | xdl_free(cf->rchash); | |
| 106 | 2123 | 4 | xdl_cha_free(&cf->ncha); | |
| 107 | 2123 | 5 | } | |
| 108 | - | |||
| 109 | - | |||
| 110 | ![]() |
172286 | 2 | static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t **rhash, |
| 111 | - | unsigned int hbits, xrecord_t *rec) { | ||
| 112 | - | long hi; | ||
| 113 | - | char const *line; | ||
| 114 | - | xdlclass_t *rcrec; | ||
| 115 | - | xdlclass_t **rcrecs; | ||
| 116 | - | |||
| 117 | 172286 | 2 | line = rec->ptr; | |
| 118 | 172286 | 2 | hi = (long) XDL_HASHLONG(rec->ha, cf->hbits); | |
| 119 | 177476 | 2,7,8 | for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next) | |
| 120 | 159548 | 3,5 | if (rcrec->ha == rec->ha && | |
| 121 | 154395 | 4 | xdl_recmatch(rcrec->line, rcrec->size, | |
| 122 | - | rec->ptr, rec->size, cf->flags)) | ||
| 123 | 154358 | 6 | break; | |
| 124 | - | |||
| 125 | 172286 | 9 | if (!rcrec) { | |
| 126 | 17928 | 10,11 | if (!(rcrec = xdl_cha_alloc(&cf->ncha))) { | |
| 127 | - | |||
| 128 | ##### | 12 | return -1; | |
| 129 | - | } | ||
| 130 | 17928 | 13 | rcrec->idx = cf->count++; | |
| 131 | 17928 | 13 | if (cf->count > cf->alloc) { | |
| 132 | ##### | 14 | cf->alloc *= 2; | |
| 133 | ##### | 14,15 | if (!(rcrecs = (xdlclass_t **) xdl_realloc(cf->rcrecs, cf->alloc * sizeof(xdlclass_t *)))) { | |
| 134 | - | |||
| 135 | ##### | 16 | return -1; | |
| 136 | - | } | ||
| 137 | ##### | 17 | cf->rcrecs = rcrecs; | |
| 138 | - | } | ||
| 139 | 17928 | 18 | cf->rcrecs[rcrec->idx] = rcrec; | |
| 140 | 17928 | 18 | rcrec->line = line; | |
| 141 | 17928 | 18 | rcrec->size = rec->size; | |
| 142 | 17928 | 18 | rcrec->ha = rec->ha; | |
| 143 | 17928 | 18 | rcrec->len1 = rcrec->len2 = 0; | |
| 144 | 17928 | 18 | rcrec->next = cf->rchash[hi]; | |
| 145 | 17928 | 18 | cf->rchash[hi] = rcrec; | |
| 146 | - | } | ||
| 147 | - | |||
| 148 | 172286 | 19-21 | (pass == 1) ? rcrec->len1++ : rcrec->len2++; | |
| 149 | - | |||
| 150 | 172286 | 22 | rec->ha = (unsigned long) rcrec->idx; | |
| 151 | - | |||
| 152 | 172286 | 22 | hi = (long) XDL_HASHLONG(rec->ha, hbits); | |
| 153 | 172286 | 22 | rec->next = rhash[hi]; | |
| 154 | 172286 | 22 | rhash[hi] = rec; | |
| 155 | - | |||
| 156 | 172286 | 22 | return 0; | |
| 157 | - | } | ||
| 158 | - | |||
| 159 | - | |||
| 160 | ![]() |
4246 | 2 | static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp, |
| 161 | - | xdlclassifier_t *cf, xdfile_t *xdf) { | ||
| 162 | - | unsigned int hbits; | ||
| 163 | - | long nrec, hsize, bsize; | ||
| 164 | - | unsigned long hav; | ||
| 165 | - | char const *blk, *cur, *top, *prev; | ||
| 166 | - | xrecord_t *crec; | ||
| 167 | - | xrecord_t **recs, **rrecs; | ||
| 168 | - | xrecord_t **rhash; | ||
| 169 | - | unsigned long *ha; | ||
| 170 | - | char *rchg; | ||
| 171 | - | long *rindex; | ||
| 172 | - | |||
| 173 | 4246 | 2 | ha = NULL; | |
| 174 | 4246 | 2 | rindex = NULL; | |
| 175 | 4246 | 2 | rchg = NULL; | |
| 176 | 4246 | 2 | rhash = NULL; | |
| 177 | 4246 | 2 | recs = NULL; | |
| 178 | - | |||
| 179 | 4246 | 2,3 | if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0) | |
| 180 | ##### | 4 | goto abort; | |
| 181 | 4246 | 5,6 | if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *)))) | |
| 182 | ##### | 7 | goto abort; | |
| 183 | - | |||
| 184 | 4246 | 8 | if (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF) | |
| 185 | ##### | 9 | hbits = hsize = 0; | |
| 186 | - | else { | ||
| 187 | 4246 | 10 | hbits = xdl_hashbits((unsigned int) narec); | |
| 188 | 4246 | 11 | hsize = 1 << hbits; | |
| 189 | 4246 | 11,12 | if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) | |
| 190 | ##### | 13 | goto abort; | |
| 191 | 4246 | 14 | memset(rhash, 0, hsize * sizeof(xrecord_t *)); | |
| 192 | - | } | ||
| 193 | - | |||
| 194 | 4246 | 15 | nrec = 0; | |
| 195 | 4246 | 15,16 | if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) { | |
| 196 | 176471 | 17,31 | for (top = blk + bsize; cur < top; ) { | |
| 197 | 172286 | 18 | prev = cur; | |
| 198 | 172286 | 18 | hav = xdl_hash_record(&cur, top, xpp->flags); | |
| 199 | 172286 | 19 | if (nrec >= narec) { | |
| 200 | ##### | 20 | narec *= 2; | |
| 201 | ##### | 20,21 | if (!(rrecs = (xrecord_t **) xdl_realloc(recs, narec * sizeof(xrecord_t *)))) | |
| 202 | ##### | 22 | goto abort; | |
| 203 | ##### | 23 | recs = rrecs; | |
| 204 | - | } | ||
| 205 | 172286 | 24,25 | if (!(crec = xdl_cha_alloc(&xdf->rcha))) | |
| 206 | ##### | 26 | goto abort; | |
| 207 | 172286 | 27 | crec->ptr = prev; | |
| 208 | 172286 | 27 | crec->size = (long) (cur - prev); | |
| 209 | 172286 | 27 | crec->ha = hav; | |
| 210 | 172286 | 27 | recs[nrec++] = crec; | |
| 211 | - | |||
| 212 | 172286 | 27,29 | if ((XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) && | |
| 213 | 172286 | 28 | xdl_classify_record(pass, cf, rhash, hbits, crec) < 0) | |
| 214 | ##### | 30 | goto abort; | |
| 215 | - | } | ||
| 216 | - | } | ||
| 217 | - | |||
| 218 | 4246 | 32,33 | if (!(rchg = (char *) xdl_malloc((nrec + 2) * sizeof(char)))) | |
| 219 | ##### | 34 | goto abort; | |
| 220 | 4246 | 35 | memset(rchg, 0, (nrec + 2) * sizeof(char)); | |
| 221 | - | |||
| 222 | 4246 | 35,36 | if (!(rindex = (long *) xdl_malloc((nrec + 1) * sizeof(long)))) | |
| 223 | ##### | 37 | goto abort; | |
| 224 | 4246 | 38,39 | if (!(ha = (unsigned long *) xdl_malloc((nrec + 1) * sizeof(unsigned long)))) | |
| 225 | ##### | 40 | goto abort; | |
| 226 | - | |||
| 227 | 4246 | 41 | xdf->nrec = nrec; | |
| 228 | 4246 | 41 | xdf->recs = recs; | |
| 229 | 4246 | 41 | xdf->hbits = hbits; | |
| 230 | 4246 | 41 | xdf->rhash = rhash; | |
| 231 | 4246 | 41 | xdf->rchg = rchg + 1; | |
| 232 | 4246 | 41 | xdf->rindex = rindex; | |
| 233 | 4246 | 41 | xdf->nreff = 0; | |
| 234 | 4246 | 41 | xdf->ha = ha; | |
| 235 | 4246 | 41 | xdf->dstart = 0; | |
| 236 | 4246 | 41 | xdf->dend = nrec - 1; | |
| 237 | - | |||
| 238 | 4246 | 41 | return 0; | |
| 239 | - | |||
| 240 | - | abort: | ||
| 241 | ##### | 42 | xdl_free(ha); | |
| 242 | ##### | 43 | xdl_free(rindex); | |
| 243 | ##### | 44 | xdl_free(rchg); | |
| 244 | ##### | 45 | xdl_free(rhash); | |
| 245 | ##### | 46 | xdl_free(recs); | |
| 246 | ##### | 47 | xdl_cha_free(&xdf->rcha); | |
| 247 | ##### | 48 | return -1; | |
| 248 | - | } | ||
| 249 | - | |||
| 250 | - | |||
| 251 | 4246 | 2 | static void xdl_free_ctx(xdfile_t *xdf) { | |
| 252 | - | |||
| 253 | 4246 | 2 | xdl_free(xdf->rhash); | |
| 254 | 4246 | 3 | xdl_free(xdf->rindex); | |
| 255 | 4246 | 4 | xdl_free(xdf->rchg - 1); | |
| 256 | 4246 | 5 | xdl_free(xdf->ha); | |
| 257 | 4246 | 6 | xdl_free(xdf->recs); | |
| 258 | 4246 | 7 | xdl_cha_free(&xdf->rcha); | |
| 259 | 4246 | 8 | } | |
| 260 | - | |||
| 261 | - | |||
| 262 | ![]() |
2123 | 2 | int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, |
| 263 | - | xdfenv_t *xe) { | ||
| 264 | - | long enl1, enl2, sample; | ||
| 265 | - | xdlclassifier_t cf; | ||
| 266 | - | |||
| 267 | 2123 | 2 | memset(&cf, 0, sizeof(cf)); | |
| 268 | - | |||
| 269 | - | /* | ||
| 270 | - | * For histogram diff, we can afford a smaller sample size and | ||
| 271 | - | * thus a poorer estimate of the number of lines, as the hash | ||
| 272 | - | * table (rhash) won't be filled up/grown. The number of lines | ||
| 273 | - | * (nrecs) will be updated correctly anyway by | ||
| 274 | - | * xdl_prepare_ctx(). | ||
| 275 | - | */ | ||
| 276 | 2123 | 2-4 | sample = (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF | |
| 277 | - | ? XDL_GUESS_NLINES2 : XDL_GUESS_NLINES1); | ||
| 278 | - | |||
| 279 | 2123 | 5 | enl1 = xdl_guess_lines(mf1, sample) + 1; | |
| 280 | 2123 | 6 | enl2 = xdl_guess_lines(mf2, sample) + 1; | |
| 281 | - | |||
| 282 | 2123 | 7,9 | if (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF && | |
| 283 | 2123 | 8 | xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0) | |
| 284 | ##### | 10 | return -1; | |
| 285 | - | |||
| 286 | 2123 | 11,12 | if (xdl_prepare_ctx(1, mf1, enl1, xpp, &cf, &xe->xdf1) < 0) { | |
| 287 | - | |||
| 288 | ##### | 13 | xdl_free_classifier(&cf); | |
| 289 | ##### | 14 | return -1; | |
| 290 | - | } | ||
| 291 | 2123 | 15,16 | if (xdl_prepare_ctx(2, mf2, enl2, xpp, &cf, &xe->xdf2) < 0) { | |
| 292 | - | |||
| 293 | ##### | 17 | xdl_free_ctx(&xe->xdf1); | |
| 294 | ##### | 18 | xdl_free_classifier(&cf); | |
| 295 | ##### | 19 | return -1; | |
| 296 | - | } | ||
| 297 | - | |||
| 298 | 2123 | 20,21 | if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) && | |
| 299 | 2122 | 21,23 | (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) && | |
| 300 | 2122 | 22 | xdl_optimize_ctxs(&cf, &xe->xdf1, &xe->xdf2) < 0) { | |
| 301 | - | |||
| 302 | ##### | 24 | xdl_free_ctx(&xe->xdf2); | |
| 303 | ##### | 25 | xdl_free_ctx(&xe->xdf1); | |
| 304 | ##### | 26 | xdl_free_classifier(&cf); | |
| 305 | ##### | 27 | return -1; | |
| 306 | - | } | ||
| 307 | - | |||
| 308 | 2123 | 28 | if (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) | |
| 309 | 2123 | 29 | xdl_free_classifier(&cf); | |
| 310 | - | |||
| 311 | 2123 | 30 | return 0; | |
| 312 | - | } | ||
| 313 | - | |||
| 314 | - | |||
| 315 | 2123 | 2 | void xdl_free_env(xdfenv_t *xe) { | |
| 316 | - | |||
| 317 | 2123 | 2 | xdl_free_ctx(&xe->xdf2); | |
| 318 | 2123 | 3 | xdl_free_ctx(&xe->xdf1); | |
| 319 | 2123 | 4 | } | |
| 320 | - | |||
| 321 | - | |||
| 322 | ![]() |
1806 | 2 | static int xdl_clean_mmatch(char const *dis, long i, long s, long e) { |
| 323 | - | long r, rdis0, rpdis0, rdis1, rpdis1; | ||
| 324 | - | |||
| 325 | - | /* | ||
| 326 | - | * Limits the window the is examined during the similar-lines | ||
| 327 | - | * scan. The loops below stops when dis[i - r] == 1 (line that | ||
| 328 | - | * has no match), but there are corner cases where the loop | ||
| 329 | - | * proceed all the way to the extremities by causing huge | ||
| 330 | - | * performance penalties in case of big files. | ||
| 331 | - | */ | ||
| 332 | 1806 | 2 | if (i - s > XDL_SIMSCAN_WINDOW) | |
| 333 | 2 | 3 | s = i - XDL_SIMSCAN_WINDOW; | |
| 334 | 1806 | 4 | if (e - i > XDL_SIMSCAN_WINDOW) | |
| 335 | ##### | 5 | e = i + XDL_SIMSCAN_WINDOW; | |
| 336 | - | |||
| 337 | - | /* | ||
| 338 | - | * Scans the lines before 'i' to find a run of lines that either | ||
| 339 | - | * have no match (dis[j] == 0) or have multiple matches (dis[j] > 1). | ||
| 340 | - | * Note that we always call this function with dis[i] > 1, so the | ||
| 341 | - | * current line (i) is already a multimatch line. | ||
| 342 | - | */ | ||
| 343 | 8179 | 6,12,13 | for (r = 1, rdis0 = 0, rpdis0 = 1; (i - r) >= s; r++) { | |
| 344 | 6910 | 7 | if (!dis[i - r]) | |
| 345 | 1289 | 8 | rdis0++; | |
| 346 | 5621 | 9 | else if (dis[i - r] == 2) | |
| 347 | 5084 | 10 | rpdis0++; | |
| 348 | - | else | ||
| 349 | 537 | 11 | break; | |
| 350 | - | } | ||
| 351 | - | /* | ||
| 352 | - | * If the run before the line 'i' found only multimatch lines, we | ||
| 353 | - | * return 0 and hence we don't make the current line (i) discarded. | ||
| 354 | - | * We want to discard multimatch lines only when they appear in the | ||
| 355 | - | * middle of runs with nomatch lines (dis[j] == 0). | ||
| 356 | - | */ | ||
| 357 | 1806 | 14 | if (rdis0 == 0) | |
| 358 | 1290 | 15 | return 0; | |
| 359 | 3931 | 16,22,23 | for (r = 1, rdis1 = 0, rpdis1 = 1; (i + r) <= e; r++) { | |
| 360 | 3465 | 17 | if (!dis[i + r]) | |
| 361 | 1374 | 18 | rdis1++; | |
| 362 | 2091 | 19 | else if (dis[i + r] == 2) | |
| 363 | 2041 | 20 | rpdis1++; | |
| 364 | - | else | ||
| 365 | 50 | 21 | break; | |
| 366 | - | } | ||
| 367 | - | /* | ||
| 368 | - | * If the run after the line 'i' found only multimatch lines, we | ||
| 369 | - | * return 0 and hence we don't make the current line (i) discarded. | ||
| 370 | - | */ | ||
| 371 | 516 | 24 | if (rdis1 == 0) | |
| 372 | 109 | 25 | return 0; | |
| 373 | 407 | 26 | rdis1 += rdis0; | |
| 374 | 407 | 26 | rpdis1 += rpdis0; | |
| 375 | - | |||
| 376 | 407 | 26 | return rpdis1 * XDL_KPDIS_RUN < (rpdis1 + rdis1); | |
| 377 | - | } | ||
| 378 | - | |||
| 379 | - | |||
| 380 | - | /* | ||
| 381 | - | * Try to reduce the problem complexity, discard records that have no | ||
| 382 | - | * matches on the other file. Also, lines that have multiple matches | ||
| 383 | - | * might be potentially discarded if they happear in a run of discardable. | ||
| 384 | - | */ | ||
| 385 | ![]() |
2122 | 2 | static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { |
| 386 | - | long i, nm, nreff, mlim; | ||
| 387 | - | xrecord_t **recs; | ||
| 388 | - | xdlclass_t *rcrec; | ||
| 389 | - | char *dis, *dis1, *dis2; | ||
| 390 | - | |||
| 391 | 2122 | 2,3 | if (!(dis = (char *) xdl_malloc(xdf1->nrec + xdf2->nrec + 2))) { | |
| 392 | - | |||
| 393 | ##### | 4 | return -1; | |
| 394 | - | } | ||
| 395 | 2122 | 5 | memset(dis, 0, xdf1->nrec + xdf2->nrec + 2); | |
| 396 | 2122 | 5 | dis1 = dis; | |
| 397 | 2122 | 5 | dis2 = dis1 + xdf1->nrec + 1; | |
| 398 | - | |||
| 399 | 2122 | 5,6 | if ((mlim = xdl_bogosqrt(xdf1->nrec)) > XDL_MAX_EQLIMIT) | |
| 400 | ##### | 7 | mlim = XDL_MAX_EQLIMIT; | |
| 401 | 9950 | 8,18,19 | for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) { | |
| 402 | 7828 | 9 | rcrec = cf->rcrecs[(*recs)->ha]; | |
| 403 | 7828 | 9-11 | nm = rcrec ? rcrec->len2 : 0; | |
| 404 | 7828 | 12-17 | dis1[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1; | |
| 405 | - | } | ||
| 406 | - | |||
| 407 | 2122 | 20,21 | if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT) | |
| 408 | ##### | 22 | mlim = XDL_MAX_EQLIMIT; | |
| 409 | 10799 | 23,33,34 | for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) { | |
| 410 | 8677 | 24 | rcrec = cf->rcrecs[(*recs)->ha]; | |
| 411 | 8677 | 24-26 | nm = rcrec ? rcrec->len1 : 0; | |
| 412 | 8677 | 27-32 | dis2[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1; | |
| 413 | - | } | ||
| 414 | - | |||
| 415 | 9950 | 35,43 | for (nreff = 0, i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; | |
| 416 | 7828 | 42 | i <= xdf1->dend; i++, recs++) { | |
| 417 | 7828 | 36,37 | if (dis1[i] == 1 || | |
| 418 | 5687 | 37-39 | (dis1[i] == 2 && !xdl_clean_mmatch(dis1, i, xdf1->dstart, xdf1->dend))) { | |
| 419 | 3129 | 40 | xdf1->rindex[nreff] = i; | |
| 420 | 3129 | 40 | xdf1->ha[nreff] = (*recs)->ha; | |
| 421 | 3129 | 40 | nreff++; | |
| 422 | - | } else | ||
| 423 | 4699 | 41 | xdf1->rchg[i] = 1; | |
| 424 | - | } | ||
| 425 | 2122 | 44 | xdf1->nreff = nreff; | |
| 426 | - | |||
| 427 | 10799 | 44,52 | for (nreff = 0, i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; | |
| 428 | 8677 | 51 | i <= xdf2->dend; i++, recs++) { | |
| 429 | 8677 | 45,46 | if (dis2[i] == 1 || | |
| 430 | 6391 | 46-48 | (dis2[i] == 2 && !xdl_clean_mmatch(dis2, i, xdf2->dstart, xdf2->dend))) { | |
| 431 | 3067 | 49 | xdf2->rindex[nreff] = i; | |
| 432 | 3067 | 49 | xdf2->ha[nreff] = (*recs)->ha; | |
| 433 | 3067 | 49 | nreff++; | |
| 434 | - | } else | ||
| 435 | 5610 | 50 | xdf2->rchg[i] = 1; | |
| 436 | - | } | ||
| 437 | 2122 | 53 | xdf2->nreff = nreff; | |
| 438 | - | |||
| 439 | 2122 | 53 | xdl_free(dis); | |
| 440 | - | |||
| 441 | 2122 | 54 | return 0; | |
| 442 | - | } | ||
| 443 | - | |||
| 444 | - | |||
| 445 | - | /* | ||
| 446 | - | * Early trim initial and terminal matching records. | ||
| 447 | - | */ | ||
| 448 | ![]() |
2122 | 2 | static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) { |
| 449 | - | long i, lim; | ||
| 450 | - | xrecord_t **recs1, **recs2; | ||
| 451 | - | |||
| 452 | 2122 | 2 | recs1 = xdf1->recs; | |
| 453 | 2122 | 2 | recs2 = xdf2->recs; | |
| 454 | 75707 | 2,6 | for (i = 0, lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim; | |
| 455 | 73585 | 5 | i++, recs1++, recs2++) | |
| 456 | 75000 | 3 | if ((*recs1)->ha != (*recs2)->ha) | |
| 457 | 1415 | 4 | break; | |
| 458 | - | |||
| 459 | 2122 | 7 | xdf1->dstart = xdf2->dstart = i; | |
| 460 | - | |||
| 461 | 2122 | 7 | recs1 = xdf1->recs + xdf1->nrec - 1; | |
| 462 | 2122 | 7 | recs2 = xdf2->recs + xdf2->nrec - 1; | |
| 463 | 6419 | 7,10,11 | for (lim -= i, i = 0; i < lim; i++, recs1--, recs2--) | |
| 464 | 5582 | 8 | if ((*recs1)->ha != (*recs2)->ha) | |
| 465 | 1285 | 9 | break; | |
| 466 | - | |||
| 467 | 2122 | 12 | xdf1->dend = xdf1->nrec - i - 1; | |
| 468 | 2122 | 12 | xdf2->dend = xdf2->nrec - i - 1; | |
| 469 | - | |||
| 470 | 2122 | 12 | return 0; | |
| 471 | - | } | ||
| 472 | - | |||
| 473 | - | |||
| 474 | ![]() |
2122 | 2 | static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { |
| 475 | - | |||
| 476 | 2122 | 2,3,5 | if (xdl_trim_ends(xdf1, xdf2) < 0 || | |
| 477 | 2122 | 4 | xdl_cleanup_records(cf, xdf1, xdf2) < 0) { | |
| 478 | - | |||
| 479 | ##### | 6 | return -1; | |
| 480 | - | } | ||
| 481 | - | |||
| 482 | 2122 | 7 | return 0; | |
| 483 | - | } |