00001 #ifndef LUX_INDEX_POSTING_H
00002 #define LUX_INDEX_POSTING_H
00003
00004 #include <vector>
00005 #include <deque>
00006 #include <iostream>
00007 #include <stdint.h>
00008
00009
00010 #include <boost/pool/pool_alloc.hpp>
00011
00012 namespace Lux {
00013
00014
00015
00016
00017 typedef std::deque<uint32_t> pos_list_t;
00018 typedef pos_list_t::iterator pos_list_itr;
00019
00020 struct Posting {
00021 Posting(void)
00022 {}
00023 Posting(pos_list_t _list, bool _skipped)
00024 : list(_list), skipped(_skipped)
00025 {}
00026
00027 pos_list_t list;
00028 bool skipped;
00029 };
00030
00031 struct PostingInf {
00032 PostingInf(uint32_t size_, uint32_t idx_)
00033 : size(size_), idx(idx_)
00034 {}
00035 uint32_t size;
00036 uint32_t idx;
00037 };
00038 typedef std::vector<PostingInf> PostingInfs;
00039 typedef PostingInfs::iterator PostingInfsItr;
00040
00041 struct sort_by_size : public std::binary_function<PostingInf, PostingInf, bool> {
00042 bool operator()(const PostingInf &t1, const PostingInf &t2) {
00043 return t1.size < t2.size;
00044 }
00045 };
00046
00052
00053 static inline
00054 int32_t next(uint8_t *b, int &i, int size)
00055 {
00056 uint32_t n = 0;
00057 for (; i < size; ++i) {
00058 n = 128 * n + b[i];
00059 if (b[i] >= 128) {
00060 n -= 128;
00061 ++i;
00062 return n;
00063 }
00064 }
00065 return -1;
00066 }
00067
00068 static inline
00069 int32_t next(uint8_t *b, int &i, int size, uint32_t &prev)
00070 {
00071 uint32_t dp = next(b, i, size);
00072 if (dp > 0) {
00073 dp += prev;
00074 } else if (dp == 0) {
00075 do {
00076 dp = next(b, i, size);
00077 } while (dp == 0);
00078 }
00079 prev = dp;
00080 return dp;
00081 }
00082
00083 static inline
00084 void skip(uint8_t *b, int &i, int size, int skip)
00085 {
00086 for (; i < size; ++i) {
00087 if (b[i] >= 128) {
00088 --skip;
00089 if (skip == 0) {
00090 ++i;
00091 return;
00092 }
00093 }
00094 }
00095 }
00096
00097
00098 static inline
00099 int32_t next(pos_list_itr *itr, pos_list_itr itr_end)
00100 {
00101 if (*itr != itr_end) {
00102 *itr += *(++*itr) + 1;
00103 if (*itr != itr_end) {
00104 return **itr;
00105 }
00106 }
00107 return -1;
00108 }
00109
00110
00111 static inline
00112 int32_t next(pos_list_itr *itr, int *n) {
00113 --*n;
00114 if (*n >= 0) {
00115 return *(++*itr);
00116 }
00117 return -1;
00118 }
00119
00120
00121 static inline pos_list_t
00122 _intersect_nodiff(uint8_t *bp, int &ip, int sp,
00123 uint8_t *bn, int &in, int sn, int len)
00124 {
00125 pos_list_t pl;
00126
00127
00128 int np = next(bp, ip, sp) - 1;
00129 int nn = next(bn, in, sn) - 1;
00130 int pp = next(bp, ip, sp);
00131 int pn = next(bn, in, sn);
00132
00133 while (1) {
00134 if (pp + len == pn) {
00135 pl.push_back(pp);
00136 if (np > 0) { pp = next(bp, ip, sp); }
00137 if (nn > 0) { pn = next(bn, in, sn); }
00138 --nn;
00139 --np;
00140 } else if (pp + len > pn) {
00141 if (nn > 0) { pn = next(bn, in, sn); }
00142 --nn;
00143 } else {
00144 if (np > 0) { pp = next(bp, ip, sp); }
00145 --np;
00146 }
00147
00148 if (np < 0 && nn < 0) break;
00149 if (np < 0 && pn > pp + len) {
00150 if (nn > 0) { skip(bn, in, sn, nn); }
00151 break;
00152 }
00153 if (nn < 0 && pp + len > pn) {
00154 if (np > 0) { skip(bp, ip, sp, np); }
00155 break;
00156 }
00157 }
00158 return pl;
00159 }
00160
00161
00162 static inline pos_list_t
00163 _intersect(uint8_t *bp, int &ip, int sp,
00164 uint8_t *bn, int &in, int sn, int len)
00165 {
00166 pos_list_t pl;
00167
00168
00169 int np = next(bp, ip, sp) - 1;
00170 int nn = next(bn, in, sn) - 1;
00171 int pp = next(bp, ip, sp);
00172 int pn = next(bn, in, sn);
00173
00174 while (1) {
00175 if (pp + len == pn) {
00176 pl.push_back(pp);
00177 if (np > 0) { pp += next(bp, ip, sp); }
00178 if (nn > 0) { pn += next(bn, in, sn); }
00179 --nn;
00180 --np;
00181 } else if (pp + len > pn) {
00182 if (nn > 0) { pn += next(bn, in, sn); }
00183 --nn;
00184 } else {
00185 if (np > 0) { pp += next(bp, ip, sp); }
00186 --np;
00187 }
00188
00189 if (np < 0 && nn < 0) break;
00190 if (np < 0 && pn > pp + len) {
00191 if (nn > 0) { skip(bn, in, sn, nn); }
00192 break;
00193 }
00194 if (nn < 0 && pp + len > pn) {
00195 if (np > 0) { skip(bp, ip, sp, np); }
00196 break;
00197 }
00198 }
00199 return pl;
00200 }
00201
00202
00203 static inline
00204 pos_list_t intersect_nodiff(uint8_t *bp, uint32_t sp,
00205 uint8_t *bn, uint32_t sn, int len)
00206 {
00207 pos_list_t pl;
00208 if (sp == 0 || sn == 0) { return pl; }
00209
00210
00211 int ip = 0, in = 0;
00212 int dp = next(bp, ip, sp);
00213 int dn = next(bn, in, sn);
00214
00215 while (1) {
00216 if (dp == dn) {
00217 pos_list_t tp = _intersect_nodiff(bp, ip , sp, bn, in, sn, len);
00218 if (!tp.empty()) {
00219 pl.push_back(dp);
00220 pl.push_back(tp.size());
00221 pos_list_itr itr_end = tp.end();
00222 for (pos_list_itr itr = tp.begin();
00223 itr != itr_end; ++itr) {
00224 pl.push_back(*itr);
00225 }
00226 }
00227 dp = next(bp, ip, sp);
00228 dn = next(bn, in, sn);
00229 } else if (dp > dn) {
00230 skip(bn, in, sn, next(bn, in, sn));
00231 dn = next(bn, in, sn);
00232 } else {
00233 skip(bp, ip, sp, next(bp, ip, sp));
00234 dp = next(bp, ip, sp);
00235 }
00236
00237 if ((ip == sp && in == sn) ||
00238 (ip == sp && dn > dp) ||
00239 (in == sn && dp > dn)) {
00240 break;
00241 }
00242 }
00243 return pl;
00244 }
00245
00246
00247 static inline
00248 pos_list_t intersect(uint8_t *bp, uint32_t sp,
00249 uint8_t *bn, uint32_t sn, int len)
00250 {
00251 pos_list_t pl;
00252 if (sp == 0 || sn == 0) { return pl; }
00253
00254
00255 int ip = 0, in = 0;
00256 uint32_t prev_dp = 0, prev_dn = 0;
00257 int dp = next(bp, ip, sp, prev_dp);
00258 int dn = next(bn, in, sn, prev_dn);
00259
00260 while (1) {
00261 if (dp == dn) {
00262 pos_list_t tp = _intersect(bp, ip, sp, bn, in, sn, len);
00263 if (!tp.empty()) {
00264 pl.push_back(dp);
00265 pl.push_back(tp.size());
00266 pos_list_itr itr_end = tp.end();
00267 for (pos_list_itr itr = tp.begin();
00268 itr != itr_end; ++itr) {
00269 pl.push_back(*itr);
00270 }
00271 }
00272 dp = next(bp, ip, sp, prev_dp);
00273 dn = next(bn, in, sn, prev_dn);
00274 } else if (dp > dn) {
00275 skip(bn, in, sn, next(bn, in, sn));
00276 dn = next(bn, in, sn, prev_dn);
00277 } else {
00278 skip(bp, ip, sp, next(bp, ip, sp));
00279 dp = next(bp, ip, sp, prev_dp);
00280 }
00281
00282 if ((ip == sp && in == sn) ||
00283 (ip == sp && dn > dp) ||
00284 (in == sn && dp > dn)) {
00285 break;
00286 }
00287 }
00288 return pl;
00289 }
00290
00291
00292
00293 static inline pos_list_t
00294 _intersect_nodiff(uint8_t *bp, int &ip, int sp, pos_list_itr itr, int len)
00295 {
00296 pos_list_t pl;
00297
00298
00299 int np = next(bp, ip, sp) - 1;
00300 int nn = *(++itr) - 1;
00301 int pp = next(bp, ip, sp);
00302 int pn = *(++itr);
00303
00304 while (1) {
00305 if (pp + len == pn) {
00306 pl.push_back(pp);
00307 if (np > 0) { pp = next(bp, ip, sp); }
00308 pn = next(&itr, &nn);
00309 --np;
00310 } else if (pp + len > pn) {
00311 pn = next(&itr, &nn);
00312 } else {
00313 if (np > 0) { pp = next(bp, ip, sp); }
00314 --np;
00315 }
00316
00317 if (np < 0 && nn < 0) break;
00318 if (np < 0 && pn > pp + len) {
00319 break;
00320 }
00321 if (nn < 0 && pp + len > pn) {
00322 if (np > 0) { skip(bp, ip, sp, np); }
00323 break;
00324 }
00325 }
00326 return pl;
00327 }
00328
00329
00330 static inline pos_list_t
00331 _intersect(uint8_t *bp, int &ip, int sp, pos_list_itr itr, int len)
00332 {
00333 pos_list_t pl;
00334
00335
00336 int np = next(bp, ip, sp) - 1;
00337 int nn = *(++itr) - 1;
00338 int pp = next(bp, ip, sp);
00339 int pn = *(++itr);
00340
00341 while (1) {
00342 if (pp + len == pn) {
00343 pl.push_back(pp);
00344 if (np > 0) { pp += next(bp, ip, sp); }
00345 pn = next(&itr, &nn);
00346 --np;
00347 } else if (pp + len > pn) {
00348 pn = next(&itr, &nn);
00349 } else {
00350 if (np > 0) { pp += next(bp, ip, sp); }
00351 --np;
00352 }
00353
00354 if (np < 0 && nn < 0) break;
00355 if (np < 0 && pn > pp + len) {
00356 break;
00357 }
00358 if (nn < 0 && pp + len > pn) {
00359 if (np > 0) { skip(bp, ip, sp, np); }
00360 break;
00361 }
00362 }
00363 return pl;
00364 }
00365
00366
00367 static inline
00368 pos_list_t intersect_nodiff(uint8_t *bp, uint32_t sp, pos_list_t &pn, int len)
00369 {
00370 pos_list_t pl;
00371 if (sp == 0 || pn.size() == 0) { return pl; }
00372
00373
00374 pos_list_itr itrn = pn.begin();
00375 pos_list_itr itrn_end = pn.end();
00376
00377 int ip = 0;
00378 int dp = next(bp, ip, sp);
00379 int dn = *itrn;
00380
00381 while (1) {
00382 if (dp == dn) {
00383 pos_list_t tp = _intersect_nodiff(bp, ip , sp, itrn, len);
00384 if (!tp.empty()) {
00385 pl.push_back(dp);
00386 pl.push_back(tp.size());
00387 pos_list_itr itr_end = tp.end();
00388 for (pos_list_itr itr = tp.begin();
00389 itr != itr_end; ++itr) {
00390 pl.push_back(*itr);
00391 }
00392 }
00393 dp = next(bp, ip, sp);
00394 dn = next(&itrn, itrn_end);
00395 } else if (dp > dn) {
00396 dn = next(&itrn, itrn_end);
00397 } else {
00398 skip(bp, ip, sp, next(bp, ip, sp));
00399 dp = next(bp, ip, sp);
00400 }
00401
00402 if ((ip == sp && itrn == itrn_end) ||
00403 (ip == sp && dn > dp) ||
00404 (itrn == itrn_end && dp > dn)) {
00405 break;
00406 }
00407 }
00408 return pl;
00409 }
00410
00411
00412 static inline
00413 pos_list_t intersect(uint8_t *bp, uint32_t sp, pos_list_t &pn, int len)
00414 {
00415 pos_list_t pl;
00416 if (sp == 0 || pn.size() == 0) { return pl; }
00417
00418
00419 pos_list_itr itrn = pn.begin();
00420 pos_list_itr itrn_end = pn.end();
00421
00422 int ip = 0;
00423 uint32_t prev_dp = 0;
00424 int dp = next(bp, ip, sp, prev_dp);
00425 int dn = *itrn;
00426
00427 while (1) {
00428 if (dp == dn) {
00429 pos_list_t tp = _intersect(bp, ip , sp, itrn, len);
00430 if (!tp.empty()) {
00431 pl.push_back(dp);
00432 pl.push_back(tp.size());
00433 pos_list_itr itr_end = tp.end();
00434 for (pos_list_itr itr = tp.begin();
00435 itr != itr_end; ++itr) {
00436 pl.push_back(*itr);
00437 }
00438 }
00439 dp = next(bp, ip, sp, prev_dp);
00440 dn = next(&itrn, itrn_end);
00441 } else if (dp > dn) {
00442 dn = next(&itrn, itrn_end);
00443 } else {
00444 skip(bp, ip, sp, next(bp, ip, sp));
00445 dp = next(bp, ip, sp, prev_dp);
00446 }
00447
00448 if ((ip == sp && itrn == itrn_end) ||
00449 (ip == sp && dn > dp) ||
00450 (itrn == itrn_end && dp > dn)) {
00451 break;
00452 }
00453 }
00454 return pl;
00455 }
00456
00457
00458 static inline pos_list_t
00459 _intersect_nodiff(pos_list_itr itr, uint8_t *bn, int &in, int sn, int len)
00460 {
00461 pos_list_t pl;
00462
00463
00464 int np = *(++itr) - 1;
00465 int nn = next(bn, in, sn) - 1;
00466 int pp = *(++itr);
00467 int pn = next(bn, in, sn);
00468
00469 while (1) {
00470 if (pp + len == pn) {
00471 pl.push_back(pp);
00472 pp = next(&itr, &np);
00473 if (nn > 0) { pn = next(bn, in, sn); }
00474 --nn;
00475 } else if (pp + len > pn) {
00476 if (nn > 0) { pn = next(bn, in, sn); }
00477 --nn;
00478 } else {
00479 pp = next(&itr, &np);
00480 }
00481
00482 if (np < 0 && nn < 0) break;
00483 if (np < 0 && pn > pp + len) {
00484 if (nn > 0) { skip(bn, in, sn, nn); }
00485 break;
00486 }
00487 if (nn < 0 && pp + len > pn) {
00488 break;
00489 }
00490 }
00491 return pl;
00492 }
00493
00494
00495 static inline pos_list_t
00496 _intersect(pos_list_itr itr, uint8_t *bn, int &in, int sn, int len)
00497 {
00498 pos_list_t pl;
00499
00500
00501 int np = *(++itr) - 1;
00502 int nn = next(bn, in, sn) - 1;
00503 int pp = *(++itr);
00504 int pn = next(bn, in, sn);
00505
00506 while (1) {
00507 if (pp + len == pn) {
00508 pl.push_back(pp);
00509 pp = next(&itr, &np);
00510 if (nn > 0) { pn += next(bn, in, sn); }
00511 --nn;
00512 } else if (pp + len > pn) {
00513 if (nn > 0) { pn += next(bn, in, sn); }
00514 --nn;
00515 } else {
00516 pp = next(&itr, &np);
00517 }
00518
00519 if (np < 0 && nn < 0) break;
00520 if (np < 0 && pn > pp + len) {
00521 if (nn > 0) { skip(bn, in, sn, nn); }
00522 break;
00523 }
00524 if (nn < 0 && pp + len > pn) {
00525 break;
00526 }
00527 }
00528 return pl;
00529 }
00530
00531
00532 static inline
00533 pos_list_t intersect_nodiff(pos_list_t &pp, uint8_t *bn, uint32_t sn, int len)
00534 {
00535 pos_list_t pl;
00536 if (pp.size() == 0 || sn == 0) { return pl; }
00537
00538
00539 pos_list_itr itrp = pp.begin();
00540 pos_list_itr itrp_end = pp.end();
00541
00542 int in = 0;
00543 int dp = *itrp;
00544 int dn = next(bn, in, sn);
00545
00546 while (1) {
00547 if (dp == dn) {
00548 pos_list_t tp = _intersect_nodiff(itrp, bn, in , sn, len);
00549 if (!tp.empty()) {
00550 pl.push_back(dp);
00551 pl.push_back(tp.size());
00552 pos_list_itr itr_end = tp.end();
00553 for (pos_list_itr itr = tp.begin();
00554 itr != itr_end; ++itr) {
00555 pl.push_back(*itr);
00556 }
00557 }
00558 dp = next(&itrp, itrp_end);
00559 dn = next(bn, in, sn);
00560 } else if (dp > dn) {
00561 skip(bn, in, sn, next(bn, in, sn));
00562 dn = next(bn, in, sn);
00563 } else {
00564 dp = next(&itrp, itrp_end);
00565 }
00566
00567 if ((itrp == itrp_end && in == sn) ||
00568 (itrp == itrp_end && dn > dp) ||
00569 (in == sn && dp > dn)) {
00570 break;
00571 }
00572 }
00573 return pl;
00574 }
00575
00576
00577 static inline
00578 pos_list_t intersect(pos_list_t &pp, uint8_t *bn, uint32_t sn, int len)
00579 {
00580 pos_list_t pl;
00581 if (pp.size() == 0 || sn == 0) { return pl; }
00582
00583
00584 pos_list_itr itrp = pp.begin();
00585 pos_list_itr itrp_end = pp.end();
00586
00587 int in = 0;
00588 int dp = *itrp;
00589 uint32_t prev_dn = 0;
00590 int dn = next(bn, in, sn, prev_dn);
00591
00592 while (1) {
00593 if (dp == dn) {
00594 pos_list_t tp = _intersect(itrp, bn, in , sn, len);
00595 if (!tp.empty()) {
00596 pl.push_back(dp);
00597 pl.push_back(tp.size());
00598 pos_list_itr itr_end = tp.end();
00599 for (pos_list_itr itr = tp.begin();
00600 itr != itr_end; ++itr) {
00601 pl.push_back(*itr);
00602 }
00603 }
00604 dp = next(&itrp, itrp_end);
00605 dn = next(bn, in, sn, prev_dn);
00606 } else if (dp > dn) {
00607 skip(bn, in, sn, next(bn, in, sn));
00608 dn = next(bn, in, sn, prev_dn);
00609 } else {
00610 dp = next(&itrp, itrp_end);
00611 }
00612
00613 if ((itrp == itrp_end && in == sn) ||
00614 (itrp == itrp_end && dn > dp) ||
00615 (in == sn && dp > dn)) {
00616 break;
00617 }
00618 }
00619 return pl;
00620 }
00621
00622 }
00623
00624 #endif