00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #include "regparse.h"
00031
00032 OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
00033
00034 extern OnigCaseFoldType
00035 onig_get_default_case_fold_flag(void)
00036 {
00037 return OnigDefaultCaseFoldFlag;
00038 }
00039
00040 extern int
00041 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
00042 {
00043 OnigDefaultCaseFoldFlag = case_fold_flag;
00044 return 0;
00045 }
00046
00047 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
00048 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
00049 #endif
00050
00051 static UChar*
00052 str_dup(UChar* s, UChar* end)
00053 {
00054 ptrdiff_t len = end - s;
00055
00056 if (len > 0) {
00057 UChar* r = (UChar* )xmalloc(len + 1);
00058 CHECK_NULL_RETURN(r);
00059 xmemcpy(r, s, len);
00060 r[len] = (UChar )0;
00061 return r;
00062 }
00063 else return NULL;
00064 }
00065
00066 static void
00067 swap_node(Node* a, Node* b)
00068 {
00069 Node c;
00070 c = *a; *a = *b; *b = c;
00071
00072 if (NTYPE(a) == NT_STR) {
00073 StrNode* sn = NSTR(a);
00074 if (sn->capa == 0) {
00075 size_t len = sn->end - sn->s;
00076 sn->s = sn->buf;
00077 sn->end = sn->s + len;
00078 }
00079 }
00080
00081 if (NTYPE(b) == NT_STR) {
00082 StrNode* sn = NSTR(b);
00083 if (sn->capa == 0) {
00084 size_t len = sn->end - sn->s;
00085 sn->s = sn->buf;
00086 sn->end = sn->s + len;
00087 }
00088 }
00089 }
00090
00091 static OnigDistance
00092 distance_add(OnigDistance d1, OnigDistance d2)
00093 {
00094 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
00095 return ONIG_INFINITE_DISTANCE;
00096 else {
00097 if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
00098 else return ONIG_INFINITE_DISTANCE;
00099 }
00100 }
00101
00102 static OnigDistance
00103 distance_multiply(OnigDistance d, int m)
00104 {
00105 if (m == 0) return 0;
00106
00107 if (d < ONIG_INFINITE_DISTANCE / m)
00108 return d * m;
00109 else
00110 return ONIG_INFINITE_DISTANCE;
00111 }
00112
00113 static int
00114 bitset_is_empty(BitSetRef bs)
00115 {
00116 int i;
00117 for (i = 0; i < (int )BITSET_SIZE; i++) {
00118 if (bs[i] != 0) return 0;
00119 }
00120 return 1;
00121 }
00122
00123 #ifdef ONIG_DEBUG
00124 static int
00125 onig_is_prelude(void)
00126 {
00127 return !rb_const_defined(rb_cThread, rb_intern_const("MUTEX_FOR_THREAD_EXCLUSIVE"));
00128 }
00129
00130 static int
00131 bitset_on_num(BitSetRef bs)
00132 {
00133 int i, n;
00134
00135 n = 0;
00136 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
00137 if (BITSET_AT(bs, i)) n++;
00138 }
00139 return n;
00140 }
00141 #endif
00142
00143 extern int
00144 onig_bbuf_init(BBuf* buf, OnigDistance size)
00145 {
00146 if (size <= 0) {
00147 size = 0;
00148 buf->p = NULL;
00149 }
00150 else {
00151 buf->p = (UChar* )xmalloc(size);
00152 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
00153 }
00154
00155 buf->alloc = (unsigned int)size;
00156 buf->used = 0;
00157 return 0;
00158 }
00159
00160
00161 #ifdef USE_SUBEXP_CALL
00162
00163 static int
00164 unset_addr_list_init(UnsetAddrList* uslist, int size)
00165 {
00166 UnsetAddr* p;
00167
00168 p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
00169 CHECK_NULL_RETURN_MEMERR(p);
00170 uslist->num = 0;
00171 uslist->alloc = size;
00172 uslist->us = p;
00173 return 0;
00174 }
00175
00176 static void
00177 unset_addr_list_end(UnsetAddrList* uslist)
00178 {
00179 if (IS_NOT_NULL(uslist->us))
00180 xfree(uslist->us);
00181 }
00182
00183 static int
00184 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
00185 {
00186 UnsetAddr* p;
00187 int size;
00188
00189 if (uslist->num >= uslist->alloc) {
00190 size = uslist->alloc * 2;
00191 p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
00192 CHECK_NULL_RETURN_MEMERR(p);
00193 uslist->alloc = size;
00194 uslist->us = p;
00195 }
00196
00197 uslist->us[uslist->num].offset = offset;
00198 uslist->us[uslist->num].target = node;
00199 uslist->num++;
00200 return 0;
00201 }
00202 #endif
00203
00204
00205 static int
00206 add_opcode(regex_t* reg, int opcode)
00207 {
00208 BBUF_ADD1(reg, opcode);
00209 return 0;
00210 }
00211
00212 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00213 static int
00214 add_state_check_num(regex_t* reg, int num)
00215 {
00216 StateCheckNumType n = (StateCheckNumType )num;
00217
00218 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
00219 return 0;
00220 }
00221 #endif
00222
00223 static int
00224 add_rel_addr(regex_t* reg, int addr)
00225 {
00226 RelAddrType ra = (RelAddrType )addr;
00227
00228 BBUF_ADD(reg, &ra, SIZE_RELADDR);
00229 return 0;
00230 }
00231
00232 static int
00233 add_abs_addr(regex_t* reg, int addr)
00234 {
00235 AbsAddrType ra = (AbsAddrType )addr;
00236
00237 BBUF_ADD(reg, &ra, SIZE_ABSADDR);
00238 return 0;
00239 }
00240
00241 static int
00242 add_length(regex_t* reg, OnigDistance len)
00243 {
00244 LengthType l = (LengthType )len;
00245
00246 BBUF_ADD(reg, &l, SIZE_LENGTH);
00247 return 0;
00248 }
00249
00250 static int
00251 add_mem_num(regex_t* reg, int num)
00252 {
00253 MemNumType n = (MemNumType )num;
00254
00255 BBUF_ADD(reg, &n, SIZE_MEMNUM);
00256 return 0;
00257 }
00258
00259 static int
00260 add_pointer(regex_t* reg, void* addr)
00261 {
00262 PointerType ptr = (PointerType )addr;
00263
00264 BBUF_ADD(reg, &ptr, SIZE_POINTER);
00265 return 0;
00266 }
00267
00268 static int
00269 add_option(regex_t* reg, OnigOptionType option)
00270 {
00271 BBUF_ADD(reg, &option, SIZE_OPTION);
00272 return 0;
00273 }
00274
00275 static int
00276 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
00277 {
00278 int r;
00279
00280 r = add_opcode(reg, opcode);
00281 if (r) return r;
00282 r = add_rel_addr(reg, addr);
00283 return r;
00284 }
00285
00286 static int
00287 add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
00288 {
00289 BBUF_ADD(reg, bytes, len);
00290 return 0;
00291 }
00292
00293 static int
00294 add_bitset(regex_t* reg, BitSetRef bs)
00295 {
00296 BBUF_ADD(reg, bs, SIZE_BITSET);
00297 return 0;
00298 }
00299
00300 static int
00301 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
00302 {
00303 int r;
00304
00305 r = add_opcode(reg, opcode);
00306 if (r) return r;
00307 r = add_option(reg, option);
00308 return r;
00309 }
00310
00311 static int compile_length_tree(Node* node, regex_t* reg);
00312 static int compile_tree(Node* node, regex_t* reg);
00313
00314
00315 #define IS_NEED_STR_LEN_OP_EXACT(op) \
00316 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
00317 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
00318
00319 static int
00320 select_str_opcode(int mb_len, OnigDistance str_len, int ignore_case)
00321 {
00322 int op;
00323
00324 if (ignore_case) {
00325 switch (str_len) {
00326 case 1: op = OP_EXACT1_IC; break;
00327 default: op = OP_EXACTN_IC; break;
00328 }
00329 }
00330 else {
00331 switch (mb_len) {
00332 case 1:
00333 switch (str_len) {
00334 case 1: op = OP_EXACT1; break;
00335 case 2: op = OP_EXACT2; break;
00336 case 3: op = OP_EXACT3; break;
00337 case 4: op = OP_EXACT4; break;
00338 case 5: op = OP_EXACT5; break;
00339 default: op = OP_EXACTN; break;
00340 }
00341 break;
00342
00343 case 2:
00344 switch (str_len) {
00345 case 1: op = OP_EXACTMB2N1; break;
00346 case 2: op = OP_EXACTMB2N2; break;
00347 case 3: op = OP_EXACTMB2N3; break;
00348 default: op = OP_EXACTMB2N; break;
00349 }
00350 break;
00351
00352 case 3:
00353 op = OP_EXACTMB3N;
00354 break;
00355
00356 default:
00357 op = OP_EXACTMBN;
00358 break;
00359 }
00360 }
00361 return op;
00362 }
00363
00364 static int
00365 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
00366 {
00367 int r;
00368 int saved_num_null_check = reg->num_null_check;
00369
00370 if (empty_info != 0) {
00371 r = add_opcode(reg, OP_NULL_CHECK_START);
00372 if (r) return r;
00373 r = add_mem_num(reg, reg->num_null_check);
00374 if (r) return r;
00375 reg->num_null_check++;
00376 }
00377
00378 r = compile_tree(node, reg);
00379 if (r) return r;
00380
00381 if (empty_info != 0) {
00382 if (empty_info == NQ_TARGET_IS_EMPTY)
00383 r = add_opcode(reg, OP_NULL_CHECK_END);
00384 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
00385 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
00386 else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
00387 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
00388
00389 if (r) return r;
00390 r = add_mem_num(reg, saved_num_null_check);
00391 }
00392 return r;
00393 }
00394
00395 #ifdef USE_SUBEXP_CALL
00396 static int
00397 compile_call(CallNode* node, regex_t* reg)
00398 {
00399 int r;
00400
00401 r = add_opcode(reg, OP_CALL);
00402 if (r) return r;
00403 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
00404 node->target);
00405 if (r) return r;
00406 r = add_abs_addr(reg, 0 );
00407 return r;
00408 }
00409 #endif
00410
00411 static int
00412 compile_tree_n_times(Node* node, int n, regex_t* reg)
00413 {
00414 int i, r;
00415
00416 for (i = 0; i < n; i++) {
00417 r = compile_tree(node, reg);
00418 if (r) return r;
00419 }
00420 return 0;
00421 }
00422
00423 static int
00424 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len,
00425 regex_t* reg ARG_UNUSED, int ignore_case)
00426 {
00427 int len;
00428 int op = select_str_opcode(mb_len, str_len, ignore_case);
00429
00430 len = SIZE_OPCODE;
00431
00432 if (op == OP_EXACTMBN) len += SIZE_LENGTH;
00433 if (IS_NEED_STR_LEN_OP_EXACT(op))
00434 len += SIZE_LENGTH;
00435
00436 len += mb_len * str_len;
00437 return len;
00438 }
00439
00440 static int
00441 add_compile_string(UChar* s, int mb_len, OnigDistance str_len,
00442 regex_t* reg, int ignore_case)
00443 {
00444 int op = select_str_opcode(mb_len, str_len, ignore_case);
00445 add_opcode(reg, op);
00446
00447 if (op == OP_EXACTMBN)
00448 add_length(reg, mb_len);
00449
00450 if (IS_NEED_STR_LEN_OP_EXACT(op)) {
00451 if (op == OP_EXACTN_IC)
00452 add_length(reg, mb_len * str_len);
00453 else
00454 add_length(reg, str_len);
00455 }
00456
00457 add_bytes(reg, s, mb_len * str_len);
00458 return 0;
00459 }
00460
00461
00462 static int
00463 compile_length_string_node(Node* node, regex_t* reg)
00464 {
00465 int rlen, r, len, prev_len, slen, ambig;
00466 OnigEncoding enc = reg->enc;
00467 UChar *p, *prev;
00468 StrNode* sn;
00469
00470 sn = NSTR(node);
00471 if (sn->end <= sn->s)
00472 return 0;
00473
00474 ambig = NSTRING_IS_AMBIG(node);
00475
00476 p = prev = sn->s;
00477 prev_len = enclen(enc, p, sn->end);
00478 p += prev_len;
00479 slen = 1;
00480 rlen = 0;
00481
00482 for (; p < sn->end; ) {
00483 len = enclen(enc, p, sn->end);
00484 if (len == prev_len) {
00485 slen++;
00486 }
00487 else {
00488 r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
00489 rlen += r;
00490 prev = p;
00491 slen = 1;
00492 prev_len = len;
00493 }
00494 p += len;
00495 }
00496 r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
00497 rlen += r;
00498 return rlen;
00499 }
00500
00501 static int
00502 compile_length_string_raw_node(StrNode* sn, regex_t* reg)
00503 {
00504 if (sn->end <= sn->s)
00505 return 0;
00506
00507 return add_compile_string_length(sn->s, 1 , sn->end - sn->s, reg, 0);
00508 }
00509
00510 static int
00511 compile_string_node(Node* node, regex_t* reg)
00512 {
00513 int r, len, prev_len, slen, ambig;
00514 OnigEncoding enc = reg->enc;
00515 UChar *p, *prev, *end;
00516 StrNode* sn;
00517
00518 sn = NSTR(node);
00519 if (sn->end <= sn->s)
00520 return 0;
00521
00522 end = sn->end;
00523 ambig = NSTRING_IS_AMBIG(node);
00524
00525 p = prev = sn->s;
00526 prev_len = enclen(enc, p, end);
00527 p += prev_len;
00528 slen = 1;
00529
00530 for (; p < end; ) {
00531 len = enclen(enc, p, end);
00532 if (len == prev_len) {
00533 slen++;
00534 }
00535 else {
00536 r = add_compile_string(prev, prev_len, slen, reg, ambig);
00537 if (r) return r;
00538
00539 prev = p;
00540 slen = 1;
00541 prev_len = len;
00542 }
00543
00544 p += len;
00545 }
00546 return add_compile_string(prev, prev_len, slen, reg, ambig);
00547 }
00548
00549 static int
00550 compile_string_raw_node(StrNode* sn, regex_t* reg)
00551 {
00552 if (sn->end <= sn->s)
00553 return 0;
00554
00555 return add_compile_string(sn->s, 1 , sn->end - sn->s, reg, 0);
00556 }
00557
00558 static int
00559 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
00560 {
00561 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
00562 add_length(reg, mbuf->used);
00563 return add_bytes(reg, mbuf->p, mbuf->used);
00564 #else
00565 int r, pad_size;
00566 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
00567
00568 GET_ALIGNMENT_PAD_SIZE(p, pad_size);
00569 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
00570 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
00571
00572 r = add_bytes(reg, mbuf->p, mbuf->used);
00573
00574
00575 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
00576 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
00577 return r;
00578 #endif
00579 }
00580
00581 static int
00582 compile_length_cclass_node(CClassNode* cc, regex_t* reg)
00583 {
00584 int len;
00585
00586 if (IS_NCCLASS_SHARE(cc)) {
00587 len = SIZE_OPCODE + SIZE_POINTER;
00588 return len;
00589 }
00590
00591 if (IS_NULL(cc->mbuf)) {
00592 len = SIZE_OPCODE + SIZE_BITSET;
00593 }
00594 else {
00595 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
00596 len = SIZE_OPCODE;
00597 }
00598 else {
00599 len = SIZE_OPCODE + SIZE_BITSET;
00600 }
00601 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
00602 len += SIZE_LENGTH + cc->mbuf->used;
00603 #else
00604 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
00605 #endif
00606 }
00607
00608 return len;
00609 }
00610
00611 static int
00612 compile_cclass_node(CClassNode* cc, regex_t* reg)
00613 {
00614 int r;
00615
00616 if (IS_NCCLASS_SHARE(cc)) {
00617 add_opcode(reg, OP_CCLASS_NODE);
00618 r = add_pointer(reg, cc);
00619 return r;
00620 }
00621
00622 if (IS_NULL(cc->mbuf)) {
00623 if (IS_NCCLASS_NOT(cc))
00624 add_opcode(reg, OP_CCLASS_NOT);
00625 else
00626 add_opcode(reg, OP_CCLASS);
00627
00628 r = add_bitset(reg, cc->bs);
00629 }
00630 else {
00631 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
00632 if (IS_NCCLASS_NOT(cc))
00633 add_opcode(reg, OP_CCLASS_MB_NOT);
00634 else
00635 add_opcode(reg, OP_CCLASS_MB);
00636
00637 r = add_multi_byte_cclass(cc->mbuf, reg);
00638 }
00639 else {
00640 if (IS_NCCLASS_NOT(cc))
00641 add_opcode(reg, OP_CCLASS_MIX_NOT);
00642 else
00643 add_opcode(reg, OP_CCLASS_MIX);
00644
00645 r = add_bitset(reg, cc->bs);
00646 if (r) return r;
00647 r = add_multi_byte_cclass(cc->mbuf, reg);
00648 }
00649 }
00650
00651 return r;
00652 }
00653
00654 static int
00655 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
00656 {
00657 #define REPEAT_RANGE_ALLOC 4
00658
00659 OnigRepeatRange* p;
00660
00661 if (reg->repeat_range_alloc == 0) {
00662 p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
00663 CHECK_NULL_RETURN_MEMERR(p);
00664 reg->repeat_range = p;
00665 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
00666 }
00667 else if (reg->repeat_range_alloc <= id) {
00668 int n;
00669 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
00670 p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
00671 sizeof(OnigRepeatRange) * n);
00672 CHECK_NULL_RETURN_MEMERR(p);
00673 reg->repeat_range = p;
00674 reg->repeat_range_alloc = n;
00675 }
00676 else {
00677 p = reg->repeat_range;
00678 }
00679
00680 p[id].lower = lower;
00681 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
00682 return 0;
00683 }
00684
00685 static int
00686 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
00687 regex_t* reg)
00688 {
00689 int r;
00690 int num_repeat = reg->num_repeat;
00691
00692 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
00693 if (r) return r;
00694 r = add_mem_num(reg, num_repeat);
00695 reg->num_repeat++;
00696 if (r) return r;
00697 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
00698 if (r) return r;
00699
00700 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
00701 if (r) return r;
00702
00703 r = compile_tree_empty_check(qn->target, reg, empty_info);
00704 if (r) return r;
00705
00706 if (
00707 #ifdef USE_SUBEXP_CALL
00708 reg->num_call > 0 ||
00709 #endif
00710 IS_QUANTIFIER_IN_REPEAT(qn)) {
00711 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
00712 }
00713 else {
00714 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
00715 }
00716 if (r) return r;
00717 r = add_mem_num(reg, num_repeat);
00718 return r;
00719 }
00720
00721 static int
00722 is_anychar_star_quantifier(QtfrNode* qn)
00723 {
00724 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
00725 NTYPE(qn->target) == NT_CANY)
00726 return 1;
00727 else
00728 return 0;
00729 }
00730
00731 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
00732 #define CKN_ON (ckn > 0)
00733
00734 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00735
00736 static int
00737 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
00738 {
00739 int len, mod_tlen, cklen;
00740 int ckn;
00741 int infinite = IS_REPEAT_INFINITE(qn->upper);
00742 int empty_info = qn->target_empty_info;
00743 int tlen = compile_length_tree(qn->target, reg);
00744
00745 if (tlen < 0) return tlen;
00746
00747 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
00748
00749 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
00750
00751
00752 if (NTYPE(qn->target) == NT_CANY) {
00753 if (qn->greedy && infinite) {
00754 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
00755 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
00756 else
00757 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
00758 }
00759 }
00760
00761 if (empty_info != 0)
00762 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00763 else
00764 mod_tlen = tlen;
00765
00766 if (infinite && qn->lower <= 1) {
00767 if (qn->greedy) {
00768 if (qn->lower == 1)
00769 len = SIZE_OP_JUMP;
00770 else
00771 len = 0;
00772
00773 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
00774 }
00775 else {
00776 if (qn->lower == 0)
00777 len = SIZE_OP_JUMP;
00778 else
00779 len = 0;
00780
00781 len += mod_tlen + SIZE_OP_PUSH + cklen;
00782 }
00783 }
00784 else if (qn->upper == 0) {
00785 if (qn->is_refered != 0)
00786 len = SIZE_OP_JUMP + tlen;
00787 else
00788 len = 0;
00789 }
00790 else if (qn->upper == 1 && qn->greedy) {
00791 if (qn->lower == 0) {
00792 if (CKN_ON) {
00793 len = SIZE_OP_STATE_CHECK_PUSH + tlen;
00794 }
00795 else {
00796 len = SIZE_OP_PUSH + tlen;
00797 }
00798 }
00799 else {
00800 len = tlen;
00801 }
00802 }
00803 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
00804 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
00805 }
00806 else {
00807 len = SIZE_OP_REPEAT_INC
00808 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
00809 if (CKN_ON)
00810 len += SIZE_OP_STATE_CHECK;
00811 }
00812
00813 return len;
00814 }
00815
00816 static int
00817 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
00818 {
00819 int r, mod_tlen;
00820 int ckn;
00821 int infinite = IS_REPEAT_INFINITE(qn->upper);
00822 int empty_info = qn->target_empty_info;
00823 int tlen = compile_length_tree(qn->target, reg);
00824
00825 if (tlen < 0) return tlen;
00826
00827 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
00828
00829 if (is_anychar_star_quantifier(qn)) {
00830 r = compile_tree_n_times(qn->target, qn->lower, reg);
00831 if (r) return r;
00832 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
00833 if (IS_MULTILINE(reg->options))
00834 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
00835 else
00836 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
00837 if (r) return r;
00838 if (CKN_ON) {
00839 r = add_state_check_num(reg, ckn);
00840 if (r) return r;
00841 }
00842
00843 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
00844 }
00845 else {
00846 if (IS_MULTILINE(reg->options)) {
00847 r = add_opcode(reg, (CKN_ON ?
00848 OP_STATE_CHECK_ANYCHAR_ML_STAR
00849 : OP_ANYCHAR_ML_STAR));
00850 }
00851 else {
00852 r = add_opcode(reg, (CKN_ON ?
00853 OP_STATE_CHECK_ANYCHAR_STAR
00854 : OP_ANYCHAR_STAR));
00855 }
00856 if (r) return r;
00857 if (CKN_ON)
00858 r = add_state_check_num(reg, ckn);
00859
00860 return r;
00861 }
00862 }
00863
00864 if (empty_info != 0)
00865 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00866 else
00867 mod_tlen = tlen;
00868
00869 if (infinite && qn->lower <= 1) {
00870 if (qn->greedy) {
00871 if (qn->lower == 1) {
00872 r = add_opcode_rel_addr(reg, OP_JUMP,
00873 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
00874 if (r) return r;
00875 }
00876
00877 if (CKN_ON) {
00878 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00879 if (r) return r;
00880 r = add_state_check_num(reg, ckn);
00881 if (r) return r;
00882 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
00883 }
00884 else {
00885 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
00886 }
00887 if (r) return r;
00888 r = compile_tree_empty_check(qn->target, reg, empty_info);
00889 if (r) return r;
00890 r = add_opcode_rel_addr(reg, OP_JUMP,
00891 -(mod_tlen + (int )SIZE_OP_JUMP
00892 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
00893 }
00894 else {
00895 if (qn->lower == 0) {
00896 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
00897 if (r) return r;
00898 }
00899 r = compile_tree_empty_check(qn->target, reg, empty_info);
00900 if (r) return r;
00901 if (CKN_ON) {
00902 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
00903 if (r) return r;
00904 r = add_state_check_num(reg, ckn);
00905 if (r) return r;
00906 r = add_rel_addr(reg,
00907 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
00908 }
00909 else
00910 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
00911 }
00912 }
00913 else if (qn->upper == 0) {
00914 if (qn->is_refered != 0) {
00915 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
00916 if (r) return r;
00917 r = compile_tree(qn->target, reg);
00918 }
00919 else
00920 r = 0;
00921 }
00922 else if (qn->upper == 1 && qn->greedy) {
00923 if (qn->lower == 0) {
00924 if (CKN_ON) {
00925 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00926 if (r) return r;
00927 r = add_state_check_num(reg, ckn);
00928 if (r) return r;
00929 r = add_rel_addr(reg, tlen);
00930 }
00931 else {
00932 r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
00933 }
00934 if (r) return r;
00935 }
00936
00937 r = compile_tree(qn->target, reg);
00938 }
00939 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
00940 if (CKN_ON) {
00941 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00942 if (r) return r;
00943 r = add_state_check_num(reg, ckn);
00944 if (r) return r;
00945 r = add_rel_addr(reg, SIZE_OP_JUMP);
00946 }
00947 else {
00948 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
00949 }
00950
00951 if (r) return r;
00952 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
00953 if (r) return r;
00954 r = compile_tree(qn->target, reg);
00955 }
00956 else {
00957 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
00958 if (CKN_ON) {
00959 if (r) return r;
00960 r = add_opcode(reg, OP_STATE_CHECK);
00961 if (r) return r;
00962 r = add_state_check_num(reg, ckn);
00963 }
00964 }
00965 return r;
00966 }
00967
00968 #else
00969
00970 static int
00971 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
00972 {
00973 int len, mod_tlen;
00974 int infinite = IS_REPEAT_INFINITE(qn->upper);
00975 int empty_info = qn->target_empty_info;
00976 int tlen = compile_length_tree(qn->target, reg);
00977
00978 if (tlen < 0) return tlen;
00979
00980
00981 if (NTYPE(qn->target) == NT_CANY) {
00982 if (qn->greedy && infinite) {
00983 if (IS_NOT_NULL(qn->next_head_exact))
00984 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
00985 else
00986 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
00987 }
00988 }
00989
00990 if (empty_info != 0)
00991 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00992 else
00993 mod_tlen = tlen;
00994
00995 if (infinite &&
00996 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
00997 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
00998 len = SIZE_OP_JUMP;
00999 }
01000 else {
01001 len = tlen * qn->lower;
01002 }
01003
01004 if (qn->greedy) {
01005 if (IS_NOT_NULL(qn->head_exact))
01006 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
01007 else if (IS_NOT_NULL(qn->next_head_exact))
01008 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
01009 else
01010 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
01011 }
01012 else
01013 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
01014 }
01015 else if (qn->upper == 0 && qn->is_refered != 0) {
01016 len = SIZE_OP_JUMP + tlen;
01017 }
01018 else if (!infinite && qn->greedy &&
01019 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
01020 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01021 len = tlen * qn->lower;
01022 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
01023 }
01024 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
01025 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
01026 }
01027 else {
01028 len = SIZE_OP_REPEAT_INC
01029 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
01030 }
01031
01032 return len;
01033 }
01034
01035 static int
01036 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
01037 {
01038 int i, r, mod_tlen;
01039 int infinite = IS_REPEAT_INFINITE(qn->upper);
01040 int empty_info = qn->target_empty_info;
01041 int tlen = compile_length_tree(qn->target, reg);
01042
01043 if (tlen < 0) return tlen;
01044
01045 if (is_anychar_star_quantifier(qn)) {
01046 r = compile_tree_n_times(qn->target, qn->lower, reg);
01047 if (r) return r;
01048 if (IS_NOT_NULL(qn->next_head_exact)) {
01049 if (IS_MULTILINE(reg->options))
01050 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
01051 else
01052 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
01053 if (r) return r;
01054 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
01055 }
01056 else {
01057 if (IS_MULTILINE(reg->options))
01058 return add_opcode(reg, OP_ANYCHAR_ML_STAR);
01059 else
01060 return add_opcode(reg, OP_ANYCHAR_STAR);
01061 }
01062 }
01063
01064 if (empty_info != 0)
01065 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
01066 else
01067 mod_tlen = tlen;
01068
01069 if (infinite &&
01070 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01071 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
01072 if (qn->greedy) {
01073 if (IS_NOT_NULL(qn->head_exact))
01074 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
01075 else if (IS_NOT_NULL(qn->next_head_exact))
01076 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
01077 else
01078 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
01079 }
01080 else {
01081 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
01082 }
01083 if (r) return r;
01084 }
01085 else {
01086 r = compile_tree_n_times(qn->target, qn->lower, reg);
01087 if (r) return r;
01088 }
01089
01090 if (qn->greedy) {
01091 if (IS_NOT_NULL(qn->head_exact)) {
01092 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
01093 mod_tlen + SIZE_OP_JUMP);
01094 if (r) return r;
01095 add_bytes(reg, NSTR(qn->head_exact)->s, 1);
01096 r = compile_tree_empty_check(qn->target, reg, empty_info);
01097 if (r) return r;
01098 r = add_opcode_rel_addr(reg, OP_JUMP,
01099 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
01100 }
01101 else if (IS_NOT_NULL(qn->next_head_exact)) {
01102 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
01103 mod_tlen + SIZE_OP_JUMP);
01104 if (r) return r;
01105 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
01106 r = compile_tree_empty_check(qn->target, reg, empty_info);
01107 if (r) return r;
01108 r = add_opcode_rel_addr(reg, OP_JUMP,
01109 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
01110 }
01111 else {
01112 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
01113 if (r) return r;
01114 r = compile_tree_empty_check(qn->target, reg, empty_info);
01115 if (r) return r;
01116 r = add_opcode_rel_addr(reg, OP_JUMP,
01117 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
01118 }
01119 }
01120 else {
01121 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
01122 if (r) return r;
01123 r = compile_tree_empty_check(qn->target, reg, empty_info);
01124 if (r) return r;
01125 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
01126 }
01127 }
01128 else if (qn->upper == 0 && qn->is_refered != 0) {
01129 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
01130 if (r) return r;
01131 r = compile_tree(qn->target, reg);
01132 }
01133 else if (!infinite && qn->greedy &&
01134 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
01135 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01136 int n = qn->upper - qn->lower;
01137
01138 r = compile_tree_n_times(qn->target, qn->lower, reg);
01139 if (r) return r;
01140
01141 for (i = 0; i < n; i++) {
01142 r = add_opcode_rel_addr(reg, OP_PUSH,
01143 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
01144 if (r) return r;
01145 r = compile_tree(qn->target, reg);
01146 if (r) return r;
01147 }
01148 }
01149 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
01150 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
01151 if (r) return r;
01152 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
01153 if (r) return r;
01154 r = compile_tree(qn->target, reg);
01155 }
01156 else {
01157 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
01158 }
01159 return r;
01160 }
01161 #endif
01162
01163 static int
01164 compile_length_option_node(EncloseNode* node, regex_t* reg)
01165 {
01166 int tlen;
01167 OnigOptionType prev = reg->options;
01168
01169 reg->options = node->option;
01170 tlen = compile_length_tree(node->target, reg);
01171 reg->options = prev;
01172
01173 if (tlen < 0) return tlen;
01174
01175 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01176 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
01177 + tlen + SIZE_OP_SET_OPTION;
01178 }
01179 else
01180 return tlen;
01181 }
01182
01183 static int
01184 compile_option_node(EncloseNode* node, regex_t* reg)
01185 {
01186 int r;
01187 OnigOptionType prev = reg->options;
01188
01189 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01190 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
01191 if (r) return r;
01192 r = add_opcode_option(reg, OP_SET_OPTION, prev);
01193 if (r) return r;
01194 r = add_opcode(reg, OP_FAIL);
01195 if (r) return r;
01196 }
01197
01198 reg->options = node->option;
01199 r = compile_tree(node->target, reg);
01200 reg->options = prev;
01201
01202 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01203 if (r) return r;
01204 r = add_opcode_option(reg, OP_SET_OPTION, prev);
01205 }
01206 return r;
01207 }
01208
01209 static int
01210 compile_length_enclose_node(EncloseNode* node, regex_t* reg)
01211 {
01212 int len;
01213 int tlen;
01214
01215 if (node->type == ENCLOSE_OPTION)
01216 return compile_length_option_node(node, reg);
01217
01218 if (node->target) {
01219 tlen = compile_length_tree(node->target, reg);
01220 if (tlen < 0) return tlen;
01221 }
01222 else
01223 tlen = 0;
01224
01225 switch (node->type) {
01226 case ENCLOSE_MEMORY:
01227 #ifdef USE_SUBEXP_CALL
01228 if (IS_ENCLOSE_CALLED(node)) {
01229 len = SIZE_OP_MEMORY_START_PUSH + tlen
01230 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
01231 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01232 len += (IS_ENCLOSE_RECURSION(node)
01233 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
01234 else
01235 len += (IS_ENCLOSE_RECURSION(node)
01236 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
01237 }
01238 else
01239 #endif
01240 {
01241 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
01242 len = SIZE_OP_MEMORY_START_PUSH;
01243 else
01244 len = SIZE_OP_MEMORY_START;
01245
01246 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
01247 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
01248 }
01249 break;
01250
01251 case ENCLOSE_STOP_BACKTRACK:
01252 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
01253 QtfrNode* qn = NQTFR(node->target);
01254 tlen = compile_length_tree(qn->target, reg);
01255 if (tlen < 0) return tlen;
01256
01257 len = tlen * qn->lower
01258 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
01259 }
01260 else {
01261 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
01262 }
01263 break;
01264
01265 default:
01266 return ONIGERR_TYPE_BUG;
01267 break;
01268 }
01269
01270 return len;
01271 }
01272
01273 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
01274
01275 static int
01276 compile_enclose_node(EncloseNode* node, regex_t* reg)
01277 {
01278 int r, len;
01279
01280 if (node->type == ENCLOSE_OPTION)
01281 return compile_option_node(node, reg);
01282
01283 switch (node->type) {
01284 case ENCLOSE_MEMORY:
01285 #ifdef USE_SUBEXP_CALL
01286 if (IS_ENCLOSE_CALLED(node)) {
01287 r = add_opcode(reg, OP_CALL);
01288 if (r) return r;
01289 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
01290 node->state |= NST_ADDR_FIXED;
01291 r = add_abs_addr(reg, (int )node->call_addr);
01292 if (r) return r;
01293 len = compile_length_tree(node->target, reg);
01294 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
01295 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01296 len += (IS_ENCLOSE_RECURSION(node)
01297 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
01298 else
01299 len += (IS_ENCLOSE_RECURSION(node)
01300 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
01301
01302 r = add_opcode_rel_addr(reg, OP_JUMP, len);
01303 if (r) return r;
01304 }
01305 #endif
01306 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
01307 r = add_opcode(reg, OP_MEMORY_START_PUSH);
01308 else
01309 r = add_opcode(reg, OP_MEMORY_START);
01310 if (r) return r;
01311 r = add_mem_num(reg, node->regnum);
01312 if (r) return r;
01313 r = compile_tree(node->target, reg);
01314 if (r) return r;
01315 #ifdef USE_SUBEXP_CALL
01316 if (IS_ENCLOSE_CALLED(node)) {
01317 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01318 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
01319 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
01320 else
01321 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
01322 ? OP_MEMORY_END_REC : OP_MEMORY_END));
01323
01324 if (r) return r;
01325 r = add_mem_num(reg, node->regnum);
01326 if (r) return r;
01327 r = add_opcode(reg, OP_RETURN);
01328 }
01329 else
01330 #endif
01331 {
01332 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01333 r = add_opcode(reg, OP_MEMORY_END_PUSH);
01334 else
01335 r = add_opcode(reg, OP_MEMORY_END);
01336 if (r) return r;
01337 r = add_mem_num(reg, node->regnum);
01338 }
01339 break;
01340
01341 case ENCLOSE_STOP_BACKTRACK:
01342 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
01343 QtfrNode* qn = NQTFR(node->target);
01344 r = compile_tree_n_times(qn->target, qn->lower, reg);
01345 if (r) return r;
01346
01347 len = compile_length_tree(qn->target, reg);
01348 if (len < 0) return len;
01349
01350 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
01351 if (r) return r;
01352 r = compile_tree(qn->target, reg);
01353 if (r) return r;
01354 r = add_opcode(reg, OP_POP);
01355 if (r) return r;
01356 r = add_opcode_rel_addr(reg, OP_JUMP,
01357 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
01358 }
01359 else {
01360 r = add_opcode(reg, OP_PUSH_STOP_BT);
01361 if (r) return r;
01362 r = compile_tree(node->target, reg);
01363 if (r) return r;
01364 r = add_opcode(reg, OP_POP_STOP_BT);
01365 }
01366 break;
01367
01368 default:
01369 return ONIGERR_TYPE_BUG;
01370 break;
01371 }
01372
01373 return r;
01374 }
01375
01376 static int
01377 compile_length_anchor_node(AnchorNode* node, regex_t* reg)
01378 {
01379 int len;
01380 int tlen = 0;
01381
01382 if (node->target) {
01383 tlen = compile_length_tree(node->target, reg);
01384 if (tlen < 0) return tlen;
01385 }
01386
01387 switch (node->type) {
01388 case ANCHOR_PREC_READ:
01389 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
01390 break;
01391 case ANCHOR_PREC_READ_NOT:
01392 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
01393 break;
01394 case ANCHOR_LOOK_BEHIND:
01395 len = SIZE_OP_LOOK_BEHIND + tlen;
01396 break;
01397 case ANCHOR_LOOK_BEHIND_NOT:
01398 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
01399 break;
01400
01401 default:
01402 len = SIZE_OPCODE;
01403 break;
01404 }
01405
01406 return len;
01407 }
01408
01409 static int
01410 compile_anchor_node(AnchorNode* node, regex_t* reg)
01411 {
01412 int r, len;
01413
01414 switch (node->type) {
01415 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
01416 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
01417 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
01418 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
01419 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
01420 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
01421
01422 case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break;
01423 case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;
01424 #ifdef USE_WORD_BEGIN_END
01425 case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break;
01426 case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break;
01427 #endif
01428
01429 case ANCHOR_PREC_READ:
01430 r = add_opcode(reg, OP_PUSH_POS);
01431 if (r) return r;
01432 r = compile_tree(node->target, reg);
01433 if (r) return r;
01434 r = add_opcode(reg, OP_POP_POS);
01435 break;
01436
01437 case ANCHOR_PREC_READ_NOT:
01438 len = compile_length_tree(node->target, reg);
01439 if (len < 0) return len;
01440 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
01441 if (r) return r;
01442 r = compile_tree(node->target, reg);
01443 if (r) return r;
01444 r = add_opcode(reg, OP_FAIL_POS);
01445 break;
01446
01447 case ANCHOR_LOOK_BEHIND:
01448 {
01449 int n;
01450 r = add_opcode(reg, OP_LOOK_BEHIND);
01451 if (r) return r;
01452 if (node->char_len < 0) {
01453 r = get_char_length_tree(node->target, reg, &n);
01454 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
01455 }
01456 else
01457 n = node->char_len;
01458 r = add_length(reg, n);
01459 if (r) return r;
01460 r = compile_tree(node->target, reg);
01461 }
01462 break;
01463
01464 case ANCHOR_LOOK_BEHIND_NOT:
01465 {
01466 int n;
01467 len = compile_length_tree(node->target, reg);
01468 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
01469 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
01470 if (r) return r;
01471 if (node->char_len < 0) {
01472 r = get_char_length_tree(node->target, reg, &n);
01473 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
01474 }
01475 else
01476 n = node->char_len;
01477 r = add_length(reg, n);
01478 if (r) return r;
01479 r = compile_tree(node->target, reg);
01480 if (r) return r;
01481 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
01482 }
01483 break;
01484
01485 default:
01486 return ONIGERR_TYPE_BUG;
01487 break;
01488 }
01489
01490 return r;
01491 }
01492
01493 static int
01494 compile_length_tree(Node* node, regex_t* reg)
01495 {
01496 int len, type, r;
01497
01498 type = NTYPE(node);
01499 switch (type) {
01500 case NT_LIST:
01501 len = 0;
01502 do {
01503 r = compile_length_tree(NCAR(node), reg);
01504 if (r < 0) return r;
01505 len += r;
01506 } while (IS_NOT_NULL(node = NCDR(node)));
01507 r = len;
01508 break;
01509
01510 case NT_ALT:
01511 {
01512 int n;
01513
01514 n = r = 0;
01515 do {
01516 r += compile_length_tree(NCAR(node), reg);
01517 n++;
01518 } while (IS_NOT_NULL(node = NCDR(node)));
01519 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
01520 }
01521 break;
01522
01523 case NT_STR:
01524 if (NSTRING_IS_RAW(node))
01525 r = compile_length_string_raw_node(NSTR(node), reg);
01526 else
01527 r = compile_length_string_node(node, reg);
01528 break;
01529
01530 case NT_CCLASS:
01531 r = compile_length_cclass_node(NCCLASS(node), reg);
01532 break;
01533
01534 case NT_CTYPE:
01535 case NT_CANY:
01536 r = SIZE_OPCODE;
01537 break;
01538
01539 case NT_BREF:
01540 {
01541 BRefNode* br = NBREF(node);
01542
01543 #ifdef USE_BACKREF_WITH_LEVEL
01544 if (IS_BACKREF_NEST_LEVEL(br)) {
01545 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
01546 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
01547 }
01548 else
01549 #endif
01550 if (br->back_num == 1) {
01551 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
01552 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
01553 }
01554 else {
01555 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
01556 }
01557 }
01558 break;
01559
01560 #ifdef USE_SUBEXP_CALL
01561 case NT_CALL:
01562 r = SIZE_OP_CALL;
01563 break;
01564 #endif
01565
01566 case NT_QTFR:
01567 r = compile_length_quantifier_node(NQTFR(node), reg);
01568 break;
01569
01570 case NT_ENCLOSE:
01571 r = compile_length_enclose_node(NENCLOSE(node), reg);
01572 break;
01573
01574 case NT_ANCHOR:
01575 r = compile_length_anchor_node(NANCHOR(node), reg);
01576 break;
01577
01578 default:
01579 return ONIGERR_TYPE_BUG;
01580 break;
01581 }
01582
01583 return r;
01584 }
01585
01586 static int
01587 compile_tree(Node* node, regex_t* reg)
01588 {
01589 int n, type, len, pos, r = 0;
01590
01591 type = NTYPE(node);
01592 switch (type) {
01593 case NT_LIST:
01594 do {
01595 r = compile_tree(NCAR(node), reg);
01596 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01597 break;
01598
01599 case NT_ALT:
01600 {
01601 Node* x = node;
01602 len = 0;
01603 do {
01604 len += compile_length_tree(NCAR(x), reg);
01605 if (NCDR(x) != NULL) {
01606 len += SIZE_OP_PUSH + SIZE_OP_JUMP;
01607 }
01608 } while (IS_NOT_NULL(x = NCDR(x)));
01609 pos = reg->used + len;
01610
01611 do {
01612 len = compile_length_tree(NCAR(node), reg);
01613 if (IS_NOT_NULL(NCDR(node))) {
01614 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
01615 if (r) break;
01616 }
01617 r = compile_tree(NCAR(node), reg);
01618 if (r) break;
01619 if (IS_NOT_NULL(NCDR(node))) {
01620 len = pos - (reg->used + SIZE_OP_JUMP);
01621 r = add_opcode_rel_addr(reg, OP_JUMP, len);
01622 if (r) break;
01623 }
01624 } while (IS_NOT_NULL(node = NCDR(node)));
01625 }
01626 break;
01627
01628 case NT_STR:
01629 if (NSTRING_IS_RAW(node))
01630 r = compile_string_raw_node(NSTR(node), reg);
01631 else
01632 r = compile_string_node(node, reg);
01633 break;
01634
01635 case NT_CCLASS:
01636 r = compile_cclass_node(NCCLASS(node), reg);
01637 break;
01638
01639 case NT_CTYPE:
01640 {
01641 int op;
01642
01643 switch (NCTYPE(node)->ctype) {
01644 case ONIGENC_CTYPE_WORD:
01645 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
01646 else op = OP_WORD;
01647 break;
01648 default:
01649 return ONIGERR_TYPE_BUG;
01650 break;
01651 }
01652 r = add_opcode(reg, op);
01653 }
01654 break;
01655
01656 case NT_CANY:
01657 if (IS_MULTILINE(reg->options))
01658 r = add_opcode(reg, OP_ANYCHAR_ML);
01659 else
01660 r = add_opcode(reg, OP_ANYCHAR);
01661 break;
01662
01663 case NT_BREF:
01664 {
01665 BRefNode* br = NBREF(node);
01666
01667 #ifdef USE_BACKREF_WITH_LEVEL
01668 if (IS_BACKREF_NEST_LEVEL(br)) {
01669 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
01670 if (r) return r;
01671 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
01672 if (r) return r;
01673 r = add_length(reg, br->nest_level);
01674 if (r) return r;
01675
01676 goto add_bacref_mems;
01677 }
01678 else
01679 #endif
01680 if (br->back_num == 1) {
01681 n = br->back_static[0];
01682 if (IS_IGNORECASE(reg->options)) {
01683 r = add_opcode(reg, OP_BACKREFN_IC);
01684 if (r) return r;
01685 r = add_mem_num(reg, n);
01686 }
01687 else {
01688 switch (n) {
01689 case 1: r = add_opcode(reg, OP_BACKREF1); break;
01690 case 2: r = add_opcode(reg, OP_BACKREF2); break;
01691 default:
01692 r = add_opcode(reg, OP_BACKREFN);
01693 if (r) return r;
01694 r = add_mem_num(reg, n);
01695 break;
01696 }
01697 }
01698 }
01699 else {
01700 int i;
01701 int* p;
01702
01703 if (IS_IGNORECASE(reg->options)) {
01704 r = add_opcode(reg, OP_BACKREF_MULTI_IC);
01705 }
01706 else {
01707 r = add_opcode(reg, OP_BACKREF_MULTI);
01708 }
01709 if (r) return r;
01710
01711 #ifdef USE_BACKREF_WITH_LEVEL
01712 add_bacref_mems:
01713 #endif
01714 r = add_length(reg, br->back_num);
01715 if (r) return r;
01716 p = BACKREFS_P(br);
01717 for (i = br->back_num - 1; i >= 0; i--) {
01718 r = add_mem_num(reg, p[i]);
01719 if (r) return r;
01720 }
01721 }
01722 }
01723 break;
01724
01725 #ifdef USE_SUBEXP_CALL
01726 case NT_CALL:
01727 r = compile_call(NCALL(node), reg);
01728 break;
01729 #endif
01730
01731 case NT_QTFR:
01732 r = compile_quantifier_node(NQTFR(node), reg);
01733 break;
01734
01735 case NT_ENCLOSE:
01736 r = compile_enclose_node(NENCLOSE(node), reg);
01737 break;
01738
01739 case NT_ANCHOR:
01740 r = compile_anchor_node(NANCHOR(node), reg);
01741 break;
01742
01743 default:
01744 #ifdef ONIG_DEBUG
01745 fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
01746 #endif
01747 break;
01748 }
01749
01750 return r;
01751 }
01752
01753 #ifdef USE_NAMED_GROUP
01754
01755 static int
01756 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
01757 {
01758 int r = 0;
01759 Node* node = *plink;
01760
01761 switch (NTYPE(node)) {
01762 case NT_LIST:
01763 case NT_ALT:
01764 do {
01765 r = noname_disable_map(&(NCAR(node)), map, counter);
01766 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01767 break;
01768
01769 case NT_QTFR:
01770 {
01771 Node** ptarget = &(NQTFR(node)->target);
01772 Node* old = *ptarget;
01773 r = noname_disable_map(ptarget, map, counter);
01774 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
01775 onig_reduce_nested_quantifier(node, *ptarget);
01776 }
01777 }
01778 break;
01779
01780 case NT_ENCLOSE:
01781 {
01782 EncloseNode* en = NENCLOSE(node);
01783 if (en->type == ENCLOSE_MEMORY) {
01784 if (IS_ENCLOSE_NAMED_GROUP(en)) {
01785 (*counter)++;
01786 map[en->regnum].new_val = *counter;
01787 en->regnum = *counter;
01788 r = noname_disable_map(&(en->target), map, counter);
01789 }
01790 else {
01791 *plink = en->target;
01792 en->target = NULL_NODE;
01793 onig_node_free(node);
01794 r = noname_disable_map(plink, map, counter);
01795 }
01796 }
01797 else
01798 r = noname_disable_map(&(en->target), map, counter);
01799 }
01800 break;
01801
01802 case NT_ANCHOR:
01803 {
01804 AnchorNode* an = NANCHOR(node);
01805 switch (an->type) {
01806 case ANCHOR_PREC_READ:
01807 case ANCHOR_PREC_READ_NOT:
01808 case ANCHOR_LOOK_BEHIND:
01809 case ANCHOR_LOOK_BEHIND_NOT:
01810 r = noname_disable_map(&(an->target), map, counter);
01811 break;
01812 }
01813 }
01814 break;
01815
01816 default:
01817 break;
01818 }
01819
01820 return r;
01821 }
01822
01823 static int
01824 renumber_node_backref(Node* node, GroupNumRemap* map)
01825 {
01826 int i, pos, n, old_num;
01827 int *backs;
01828 BRefNode* bn = NBREF(node);
01829
01830 if (! IS_BACKREF_NAME_REF(bn))
01831 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
01832
01833 old_num = bn->back_num;
01834 if (IS_NULL(bn->back_dynamic))
01835 backs = bn->back_static;
01836 else
01837 backs = bn->back_dynamic;
01838
01839 for (i = 0, pos = 0; i < old_num; i++) {
01840 n = map[backs[i]].new_val;
01841 if (n > 0) {
01842 backs[pos] = n;
01843 pos++;
01844 }
01845 }
01846
01847 bn->back_num = pos;
01848 return 0;
01849 }
01850
01851 static int
01852 renumber_by_map(Node* node, GroupNumRemap* map)
01853 {
01854 int r = 0;
01855
01856 switch (NTYPE(node)) {
01857 case NT_LIST:
01858 case NT_ALT:
01859 do {
01860 r = renumber_by_map(NCAR(node), map);
01861 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01862 break;
01863 case NT_QTFR:
01864 r = renumber_by_map(NQTFR(node)->target, map);
01865 break;
01866 case NT_ENCLOSE:
01867 r = renumber_by_map(NENCLOSE(node)->target, map);
01868 break;
01869
01870 case NT_BREF:
01871 r = renumber_node_backref(node, map);
01872 break;
01873
01874 case NT_ANCHOR:
01875 {
01876 AnchorNode* an = NANCHOR(node);
01877 switch (an->type) {
01878 case ANCHOR_PREC_READ:
01879 case ANCHOR_PREC_READ_NOT:
01880 case ANCHOR_LOOK_BEHIND:
01881 case ANCHOR_LOOK_BEHIND_NOT:
01882 r = renumber_by_map(an->target, map);
01883 break;
01884 }
01885 }
01886 break;
01887
01888 default:
01889 break;
01890 }
01891
01892 return r;
01893 }
01894
01895 static int
01896 numbered_ref_check(Node* node)
01897 {
01898 int r = 0;
01899
01900 switch (NTYPE(node)) {
01901 case NT_LIST:
01902 case NT_ALT:
01903 do {
01904 r = numbered_ref_check(NCAR(node));
01905 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01906 break;
01907 case NT_QTFR:
01908 r = numbered_ref_check(NQTFR(node)->target);
01909 break;
01910 case NT_ENCLOSE:
01911 r = numbered_ref_check(NENCLOSE(node)->target);
01912 break;
01913
01914 case NT_BREF:
01915 if (! IS_BACKREF_NAME_REF(NBREF(node)))
01916 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
01917 break;
01918
01919 default:
01920 break;
01921 }
01922
01923 return r;
01924 }
01925
01926 static int
01927 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
01928 {
01929 int r, i, pos, counter;
01930 BitStatusType loc;
01931 GroupNumRemap* map;
01932
01933 map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
01934 CHECK_NULL_RETURN_MEMERR(map);
01935 for (i = 1; i <= env->num_mem; i++) {
01936 map[i].new_val = 0;
01937 }
01938 counter = 0;
01939 r = noname_disable_map(root, map, &counter);
01940 if (r != 0) return r;
01941
01942 r = renumber_by_map(*root, map);
01943 if (r != 0) return r;
01944
01945 for (i = 1, pos = 1; i <= env->num_mem; i++) {
01946 if (map[i].new_val > 0) {
01947 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
01948 pos++;
01949 }
01950 }
01951
01952 loc = env->capture_history;
01953 BIT_STATUS_CLEAR(env->capture_history);
01954 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
01955 if (BIT_STATUS_AT(loc, i)) {
01956 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
01957 }
01958 }
01959
01960 env->num_mem = env->num_named;
01961 reg->num_mem = env->num_named;
01962
01963 return onig_renumber_name_table(reg, map);
01964 }
01965 #endif
01966
01967 #ifdef USE_SUBEXP_CALL
01968 static int
01969 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
01970 {
01971 int i, offset;
01972 EncloseNode* en;
01973 AbsAddrType addr;
01974
01975 for (i = 0; i < uslist->num; i++) {
01976 en = NENCLOSE(uslist->us[i].target);
01977 if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
01978 addr = en->call_addr;
01979 offset = uslist->us[i].offset;
01980
01981 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
01982 }
01983 return 0;
01984 }
01985 #endif
01986
01987 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
01988 static int
01989 quantifiers_memory_node_info(Node* node)
01990 {
01991 int r = 0;
01992
01993 switch (NTYPE(node)) {
01994 case NT_LIST:
01995 case NT_ALT:
01996 {
01997 int v;
01998 do {
01999 v = quantifiers_memory_node_info(NCAR(node));
02000 if (v > r) r = v;
02001 } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
02002 }
02003 break;
02004
02005 #ifdef USE_SUBEXP_CALL
02006 case NT_CALL:
02007 if (IS_CALL_RECURSION(NCALL(node))) {
02008 return NQ_TARGET_IS_EMPTY_REC;
02009 }
02010 else
02011 r = quantifiers_memory_node_info(NCALL(node)->target);
02012 break;
02013 #endif
02014
02015 case NT_QTFR:
02016 {
02017 QtfrNode* qn = NQTFR(node);
02018 if (qn->upper != 0) {
02019 r = quantifiers_memory_node_info(qn->target);
02020 }
02021 }
02022 break;
02023
02024 case NT_ENCLOSE:
02025 {
02026 EncloseNode* en = NENCLOSE(node);
02027 switch (en->type) {
02028 case ENCLOSE_MEMORY:
02029 return NQ_TARGET_IS_EMPTY_MEM;
02030 break;
02031
02032 case ENCLOSE_OPTION:
02033 case ENCLOSE_STOP_BACKTRACK:
02034 r = quantifiers_memory_node_info(en->target);
02035 break;
02036 default:
02037 break;
02038 }
02039 }
02040 break;
02041
02042 case NT_BREF:
02043 case NT_STR:
02044 case NT_CTYPE:
02045 case NT_CCLASS:
02046 case NT_CANY:
02047 case NT_ANCHOR:
02048 default:
02049 break;
02050 }
02051
02052 return r;
02053 }
02054 #endif
02055
02056 static int
02057 get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
02058 {
02059 OnigDistance tmin;
02060 int r = 0;
02061
02062 *min = 0;
02063 switch (NTYPE(node)) {
02064 case NT_BREF:
02065 {
02066 int i;
02067 int* backs;
02068 Node** nodes = SCANENV_MEM_NODES(env);
02069 BRefNode* br = NBREF(node);
02070 if (br->state & NST_RECURSION) break;
02071
02072 backs = BACKREFS_P(br);
02073 if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
02074 r = get_min_match_length(nodes[backs[0]], min, env);
02075 if (r != 0) break;
02076 for (i = 1; i < br->back_num; i++) {
02077 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
02078 r = get_min_match_length(nodes[backs[i]], &tmin, env);
02079 if (r != 0) break;
02080 if (*min > tmin) *min = tmin;
02081 }
02082 }
02083 break;
02084
02085 #ifdef USE_SUBEXP_CALL
02086 case NT_CALL:
02087 if (IS_CALL_RECURSION(NCALL(node))) {
02088 EncloseNode* en = NENCLOSE(NCALL(node)->target);
02089 if (IS_ENCLOSE_MIN_FIXED(en))
02090 *min = en->min_len;
02091 }
02092 else
02093 r = get_min_match_length(NCALL(node)->target, min, env);
02094 break;
02095 #endif
02096
02097 case NT_LIST:
02098 do {
02099 r = get_min_match_length(NCAR(node), &tmin, env);
02100 if (r == 0) *min += tmin;
02101 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02102 break;
02103
02104 case NT_ALT:
02105 {
02106 Node *x, *y;
02107 y = node;
02108 do {
02109 x = NCAR(y);
02110 r = get_min_match_length(x, &tmin, env);
02111 if (r != 0) break;
02112 if (y == node) *min = tmin;
02113 else if (*min > tmin) *min = tmin;
02114 } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
02115 }
02116 break;
02117
02118 case NT_STR:
02119 {
02120 StrNode* sn = NSTR(node);
02121 *min = sn->end - sn->s;
02122 }
02123 break;
02124
02125 case NT_CTYPE:
02126 *min = 1;
02127 break;
02128
02129 case NT_CCLASS:
02130 case NT_CANY:
02131 *min = 1;
02132 break;
02133
02134 case NT_QTFR:
02135 {
02136 QtfrNode* qn = NQTFR(node);
02137
02138 if (qn->lower > 0) {
02139 r = get_min_match_length(qn->target, min, env);
02140 if (r == 0)
02141 *min = distance_multiply(*min, qn->lower);
02142 }
02143 }
02144 break;
02145
02146 case NT_ENCLOSE:
02147 {
02148 EncloseNode* en = NENCLOSE(node);
02149 switch (en->type) {
02150 case ENCLOSE_MEMORY:
02151 #ifdef USE_SUBEXP_CALL
02152 if (IS_ENCLOSE_MIN_FIXED(en))
02153 *min = en->min_len;
02154 else {
02155 r = get_min_match_length(en->target, min, env);
02156 if (r == 0) {
02157 en->min_len = *min;
02158 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
02159 }
02160 }
02161 break;
02162 #endif
02163 case ENCLOSE_OPTION:
02164 case ENCLOSE_STOP_BACKTRACK:
02165 r = get_min_match_length(en->target, min, env);
02166 break;
02167 }
02168 }
02169 break;
02170
02171 case NT_ANCHOR:
02172 default:
02173 break;
02174 }
02175
02176 return r;
02177 }
02178
02179 static int
02180 get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
02181 {
02182 OnigDistance tmax;
02183 int r = 0;
02184
02185 *max = 0;
02186 switch (NTYPE(node)) {
02187 case NT_LIST:
02188 do {
02189 r = get_max_match_length(NCAR(node), &tmax, env);
02190 if (r == 0)
02191 *max = distance_add(*max, tmax);
02192 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02193 break;
02194
02195 case NT_ALT:
02196 do {
02197 r = get_max_match_length(NCAR(node), &tmax, env);
02198 if (r == 0 && *max < tmax) *max = tmax;
02199 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02200 break;
02201
02202 case NT_STR:
02203 {
02204 StrNode* sn = NSTR(node);
02205 *max = sn->end - sn->s;
02206 }
02207 break;
02208
02209 case NT_CTYPE:
02210 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
02211 break;
02212
02213 case NT_CCLASS:
02214 case NT_CANY:
02215 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
02216 break;
02217
02218 case NT_BREF:
02219 {
02220 int i;
02221 int* backs;
02222 Node** nodes = SCANENV_MEM_NODES(env);
02223 BRefNode* br = NBREF(node);
02224 if (br->state & NST_RECURSION) {
02225 *max = ONIG_INFINITE_DISTANCE;
02226 break;
02227 }
02228 backs = BACKREFS_P(br);
02229 for (i = 0; i < br->back_num; i++) {
02230 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
02231 r = get_max_match_length(nodes[backs[i]], &tmax, env);
02232 if (r != 0) break;
02233 if (*max < tmax) *max = tmax;
02234 }
02235 }
02236 break;
02237
02238 #ifdef USE_SUBEXP_CALL
02239 case NT_CALL:
02240 if (! IS_CALL_RECURSION(NCALL(node)))
02241 r = get_max_match_length(NCALL(node)->target, max, env);
02242 else
02243 *max = ONIG_INFINITE_DISTANCE;
02244 break;
02245 #endif
02246
02247 case NT_QTFR:
02248 {
02249 QtfrNode* qn = NQTFR(node);
02250
02251 if (qn->upper != 0) {
02252 r = get_max_match_length(qn->target, max, env);
02253 if (r == 0 && *max != 0) {
02254 if (! IS_REPEAT_INFINITE(qn->upper))
02255 *max = distance_multiply(*max, qn->upper);
02256 else
02257 *max = ONIG_INFINITE_DISTANCE;
02258 }
02259 }
02260 }
02261 break;
02262
02263 case NT_ENCLOSE:
02264 {
02265 EncloseNode* en = NENCLOSE(node);
02266 switch (en->type) {
02267 case ENCLOSE_MEMORY:
02268 #ifdef USE_SUBEXP_CALL
02269 if (IS_ENCLOSE_MAX_FIXED(en))
02270 *max = en->max_len;
02271 else {
02272 r = get_max_match_length(en->target, max, env);
02273 if (r == 0) {
02274 en->max_len = *max;
02275 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
02276 }
02277 }
02278 break;
02279 #endif
02280 case ENCLOSE_OPTION:
02281 case ENCLOSE_STOP_BACKTRACK:
02282 r = get_max_match_length(en->target, max, env);
02283 break;
02284 }
02285 }
02286 break;
02287
02288 case NT_ANCHOR:
02289 default:
02290 break;
02291 }
02292
02293 return r;
02294 }
02295
02296 #define GET_CHAR_LEN_VARLEN -1
02297 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
02298
02299
02300 static int
02301 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
02302 {
02303 int tlen;
02304 int r = 0;
02305
02306 level++;
02307 *len = 0;
02308 switch (NTYPE(node)) {
02309 case NT_LIST:
02310 do {
02311 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
02312 if (r == 0)
02313 *len = (int)distance_add(*len, tlen);
02314 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02315 break;
02316
02317 case NT_ALT:
02318 {
02319 int tlen2;
02320 int varlen = 0;
02321
02322 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
02323 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
02324 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
02325 if (r == 0) {
02326 if (tlen != tlen2)
02327 varlen = 1;
02328 }
02329 }
02330 if (r == 0) {
02331 if (varlen != 0) {
02332 if (level == 1)
02333 r = GET_CHAR_LEN_TOP_ALT_VARLEN;
02334 else
02335 r = GET_CHAR_LEN_VARLEN;
02336 }
02337 else
02338 *len = tlen;
02339 }
02340 }
02341 break;
02342
02343 case NT_STR:
02344 {
02345 StrNode* sn = NSTR(node);
02346 UChar *s = sn->s;
02347 while (s < sn->end) {
02348 s += enclen(reg->enc, s, sn->end);
02349 (*len)++;
02350 }
02351 }
02352 break;
02353
02354 case NT_QTFR:
02355 {
02356 QtfrNode* qn = NQTFR(node);
02357 if (qn->lower == qn->upper) {
02358 r = get_char_length_tree1(qn->target, reg, &tlen, level);
02359 if (r == 0)
02360 *len = (int)distance_multiply(tlen, qn->lower);
02361 }
02362 else
02363 r = GET_CHAR_LEN_VARLEN;
02364 }
02365 break;
02366
02367 #ifdef USE_SUBEXP_CALL
02368 case NT_CALL:
02369 if (! IS_CALL_RECURSION(NCALL(node)))
02370 r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
02371 else
02372 r = GET_CHAR_LEN_VARLEN;
02373 break;
02374 #endif
02375
02376 case NT_CTYPE:
02377 *len = 1;
02378 break;
02379
02380 case NT_CCLASS:
02381 case NT_CANY:
02382 *len = 1;
02383 break;
02384
02385 case NT_ENCLOSE:
02386 {
02387 EncloseNode* en = NENCLOSE(node);
02388 switch (en->type) {
02389 case ENCLOSE_MEMORY:
02390 #ifdef USE_SUBEXP_CALL
02391 if (IS_ENCLOSE_CLEN_FIXED(en))
02392 *len = en->char_len;
02393 else {
02394 r = get_char_length_tree1(en->target, reg, len, level);
02395 if (r == 0) {
02396 en->char_len = *len;
02397 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
02398 }
02399 }
02400 break;
02401 #endif
02402 case ENCLOSE_OPTION:
02403 case ENCLOSE_STOP_BACKTRACK:
02404 r = get_char_length_tree1(en->target, reg, len, level);
02405 break;
02406 default:
02407 break;
02408 }
02409 }
02410 break;
02411
02412 case NT_ANCHOR:
02413 break;
02414
02415 default:
02416 r = GET_CHAR_LEN_VARLEN;
02417 break;
02418 }
02419
02420 return r;
02421 }
02422
02423 static int
02424 get_char_length_tree(Node* node, regex_t* reg, int* len)
02425 {
02426 return get_char_length_tree1(node, reg, len, 0);
02427 }
02428
02429
02430 static int
02431 is_not_included(Node* x, Node* y, regex_t* reg)
02432 {
02433 int i;
02434 OnigDistance len;
02435 OnigCodePoint code;
02436 UChar *p, c;
02437 int ytype;
02438
02439 retry:
02440 ytype = NTYPE(y);
02441 switch (NTYPE(x)) {
02442 case NT_CTYPE:
02443 {
02444 switch (ytype) {
02445 case NT_CTYPE:
02446 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
02447 NCTYPE(y)->not != NCTYPE(x)->not)
02448 return 1;
02449 else
02450 return 0;
02451 break;
02452
02453 case NT_CCLASS:
02454 swap:
02455 {
02456 Node* tmp;
02457 tmp = x; x = y; y = tmp;
02458 goto retry;
02459 }
02460 break;
02461
02462 case NT_STR:
02463 goto swap;
02464 break;
02465
02466 default:
02467 break;
02468 }
02469 }
02470 break;
02471
02472 case NT_CCLASS:
02473 {
02474 CClassNode* xc = NCCLASS(x);
02475 switch (ytype) {
02476 case NT_CTYPE:
02477 switch (NCTYPE(y)->ctype) {
02478 case ONIGENC_CTYPE_WORD:
02479 if (NCTYPE(y)->not == 0) {
02480 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
02481 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02482 if (BITSET_AT(xc->bs, i)) {
02483 if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
02484 }
02485 }
02486 return 1;
02487 }
02488 return 0;
02489 }
02490 else {
02491 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02492 if (! IS_CODE_SB_WORD(reg->enc, i)) {
02493 if (!IS_NCCLASS_NOT(xc)) {
02494 if (BITSET_AT(xc->bs, i))
02495 return 0;
02496 }
02497 else {
02498 if (! BITSET_AT(xc->bs, i))
02499 return 0;
02500 }
02501 }
02502 }
02503 return 1;
02504 }
02505 break;
02506
02507 default:
02508 break;
02509 }
02510 break;
02511
02512 case NT_CCLASS:
02513 {
02514 int v;
02515 CClassNode* yc = NCCLASS(y);
02516
02517 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02518 v = BITSET_AT(xc->bs, i);
02519 if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
02520 (v == 0 && IS_NCCLASS_NOT(xc))) {
02521 v = BITSET_AT(yc->bs, i);
02522 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
02523 (v == 0 && IS_NCCLASS_NOT(yc)))
02524 return 0;
02525 }
02526 }
02527 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
02528 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
02529 return 1;
02530 return 0;
02531 }
02532 break;
02533
02534 case NT_STR:
02535 goto swap;
02536 break;
02537
02538 default:
02539 break;
02540 }
02541 }
02542 break;
02543
02544 case NT_STR:
02545 {
02546 StrNode* xs = NSTR(x);
02547 if (NSTRING_LEN(x) == 0)
02548 break;
02549
02550 c = *(xs->s);
02551 switch (ytype) {
02552 case NT_CTYPE:
02553 switch (NCTYPE(y)->ctype) {
02554 case ONIGENC_CTYPE_WORD:
02555 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
02556 return NCTYPE(y)->not;
02557 else
02558 return !(NCTYPE(y)->not);
02559 break;
02560 default:
02561 break;
02562 }
02563 break;
02564
02565 case NT_CCLASS:
02566 {
02567 CClassNode* cc = NCCLASS(y);
02568
02569 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
02570 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
02571 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
02572 }
02573 break;
02574
02575 case NT_STR:
02576 {
02577 UChar *q;
02578 StrNode* ys = NSTR(y);
02579 len = NSTRING_LEN(x);
02580 if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
02581 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
02582
02583 return 0;
02584 }
02585 else {
02586 for (i = 0, p = ys->s, q = xs->s; (OnigDistance)i < len; i++, p++, q++) {
02587 if (*p != *q) return 1;
02588 }
02589 }
02590 }
02591 break;
02592
02593 default:
02594 break;
02595 }
02596 }
02597 break;
02598
02599 default:
02600 break;
02601 }
02602
02603 return 0;
02604 }
02605
02606 static Node*
02607 get_head_value_node(Node* node, int exact, regex_t* reg)
02608 {
02609 Node* n = NULL_NODE;
02610
02611 switch (NTYPE(node)) {
02612 case NT_BREF:
02613 case NT_ALT:
02614 case NT_CANY:
02615 #ifdef USE_SUBEXP_CALL
02616 case NT_CALL:
02617 #endif
02618 break;
02619
02620 case NT_CTYPE:
02621 case NT_CCLASS:
02622 if (exact == 0) {
02623 n = node;
02624 }
02625 break;
02626
02627 case NT_LIST:
02628 n = get_head_value_node(NCAR(node), exact, reg);
02629 break;
02630
02631 case NT_STR:
02632 {
02633 StrNode* sn = NSTR(node);
02634
02635 if (sn->end <= sn->s)
02636 break;
02637
02638 if (exact != 0 &&
02639 !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
02640 }
02641 else {
02642 n = node;
02643 }
02644 }
02645 break;
02646
02647 case NT_QTFR:
02648 {
02649 QtfrNode* qn = NQTFR(node);
02650 if (qn->lower > 0) {
02651 if (IS_NOT_NULL(qn->head_exact))
02652 n = qn->head_exact;
02653 else
02654 n = get_head_value_node(qn->target, exact, reg);
02655 }
02656 }
02657 break;
02658
02659 case NT_ENCLOSE:
02660 {
02661 EncloseNode* en = NENCLOSE(node);
02662 switch (en->type) {
02663 case ENCLOSE_OPTION:
02664 {
02665 OnigOptionType options = reg->options;
02666
02667 reg->options = NENCLOSE(node)->option;
02668 n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
02669 reg->options = options;
02670 }
02671 break;
02672
02673 case ENCLOSE_MEMORY:
02674 case ENCLOSE_STOP_BACKTRACK:
02675 n = get_head_value_node(en->target, exact, reg);
02676 break;
02677 }
02678 }
02679 break;
02680
02681 case NT_ANCHOR:
02682 if (NANCHOR(node)->type == ANCHOR_PREC_READ)
02683 n = get_head_value_node(NANCHOR(node)->target, exact, reg);
02684 break;
02685
02686 default:
02687 break;
02688 }
02689
02690 return n;
02691 }
02692
02693 static int
02694 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
02695 {
02696 int type, r = 0;
02697
02698 type = NTYPE(node);
02699 if ((NTYPE2BIT(type) & type_mask) == 0)
02700 return 1;
02701
02702 switch (type) {
02703 case NT_LIST:
02704 case NT_ALT:
02705 do {
02706 r = check_type_tree(NCAR(node), type_mask, enclose_mask,
02707 anchor_mask);
02708 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02709 break;
02710
02711 case NT_QTFR:
02712 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
02713 anchor_mask);
02714 break;
02715
02716 case NT_ENCLOSE:
02717 {
02718 EncloseNode* en = NENCLOSE(node);
02719 if ((en->type & enclose_mask) == 0)
02720 return 1;
02721
02722 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
02723 }
02724 break;
02725
02726 case NT_ANCHOR:
02727 type = NANCHOR(node)->type;
02728 if ((type & anchor_mask) == 0)
02729 return 1;
02730
02731 if (NANCHOR(node)->target)
02732 r = check_type_tree(NANCHOR(node)->target,
02733 type_mask, enclose_mask, anchor_mask);
02734 break;
02735
02736 default:
02737 break;
02738 }
02739 return r;
02740 }
02741
02742 #ifdef USE_SUBEXP_CALL
02743
02744 #define RECURSION_EXIST 1
02745 #define RECURSION_INFINITE 2
02746
02747 static int
02748 subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
02749 {
02750 int type;
02751 int r = 0;
02752
02753 type = NTYPE(node);
02754 switch (type) {
02755 case NT_LIST:
02756 {
02757 Node *x;
02758 OnigDistance min;
02759 int ret;
02760
02761 x = node;
02762 do {
02763 ret = subexp_inf_recursive_check(NCAR(x), env, head);
02764 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
02765 r |= ret;
02766 if (head) {
02767 ret = get_min_match_length(NCAR(x), &min, env);
02768 if (ret != 0) return ret;
02769 if (min != 0) head = 0;
02770 }
02771 } while (IS_NOT_NULL(x = NCDR(x)));
02772 }
02773 break;
02774
02775 case NT_ALT:
02776 {
02777 int ret;
02778 r = RECURSION_EXIST;
02779 do {
02780 ret = subexp_inf_recursive_check(NCAR(node), env, head);
02781 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
02782 r &= ret;
02783 } while (IS_NOT_NULL(node = NCDR(node)));
02784 }
02785 break;
02786
02787 case NT_QTFR:
02788 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
02789 if (r == RECURSION_EXIST) {
02790 if (NQTFR(node)->lower == 0) r = 0;
02791 }
02792 break;
02793
02794 case NT_ANCHOR:
02795 {
02796 AnchorNode* an = NANCHOR(node);
02797 switch (an->type) {
02798 case ANCHOR_PREC_READ:
02799 case ANCHOR_PREC_READ_NOT:
02800 case ANCHOR_LOOK_BEHIND:
02801 case ANCHOR_LOOK_BEHIND_NOT:
02802 r = subexp_inf_recursive_check(an->target, env, head);
02803 break;
02804 }
02805 }
02806 break;
02807
02808 case NT_CALL:
02809 r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
02810 break;
02811
02812 case NT_ENCLOSE:
02813 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
02814 return 0;
02815 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
02816 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
02817 else {
02818 SET_ENCLOSE_STATUS(node, NST_MARK2);
02819 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
02820 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
02821 }
02822 break;
02823
02824 default:
02825 break;
02826 }
02827
02828 return r;
02829 }
02830
02831 static int
02832 subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
02833 {
02834 int type;
02835 int r = 0;
02836
02837 type = NTYPE(node);
02838 switch (type) {
02839 case NT_LIST:
02840 case NT_ALT:
02841 do {
02842 r = subexp_inf_recursive_check_trav(NCAR(node), env);
02843 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02844 break;
02845
02846 case NT_QTFR:
02847 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
02848 break;
02849
02850 case NT_ANCHOR:
02851 {
02852 AnchorNode* an = NANCHOR(node);
02853 switch (an->type) {
02854 case ANCHOR_PREC_READ:
02855 case ANCHOR_PREC_READ_NOT:
02856 case ANCHOR_LOOK_BEHIND:
02857 case ANCHOR_LOOK_BEHIND_NOT:
02858 r = subexp_inf_recursive_check_trav(an->target, env);
02859 break;
02860 }
02861 }
02862 break;
02863
02864 case NT_ENCLOSE:
02865 {
02866 EncloseNode* en = NENCLOSE(node);
02867
02868 if (IS_ENCLOSE_RECURSION(en)) {
02869 SET_ENCLOSE_STATUS(node, NST_MARK1);
02870 r = subexp_inf_recursive_check(en->target, env, 1);
02871 if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
02872 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
02873 }
02874 r = subexp_inf_recursive_check_trav(en->target, env);
02875 }
02876
02877 break;
02878
02879 default:
02880 break;
02881 }
02882
02883 return r;
02884 }
02885
02886 static int
02887 subexp_recursive_check(Node* node)
02888 {
02889 int r = 0;
02890
02891 switch (NTYPE(node)) {
02892 case NT_LIST:
02893 case NT_ALT:
02894 do {
02895 r |= subexp_recursive_check(NCAR(node));
02896 } while (IS_NOT_NULL(node = NCDR(node)));
02897 break;
02898
02899 case NT_QTFR:
02900 r = subexp_recursive_check(NQTFR(node)->target);
02901 break;
02902
02903 case NT_ANCHOR:
02904 {
02905 AnchorNode* an = NANCHOR(node);
02906 switch (an->type) {
02907 case ANCHOR_PREC_READ:
02908 case ANCHOR_PREC_READ_NOT:
02909 case ANCHOR_LOOK_BEHIND:
02910 case ANCHOR_LOOK_BEHIND_NOT:
02911 r = subexp_recursive_check(an->target);
02912 break;
02913 }
02914 }
02915 break;
02916
02917 case NT_CALL:
02918 r = subexp_recursive_check(NCALL(node)->target);
02919 if (r != 0) SET_CALL_RECURSION(node);
02920 break;
02921
02922 case NT_ENCLOSE:
02923 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
02924 return 0;
02925 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
02926 return 1;
02927 else {
02928 SET_ENCLOSE_STATUS(node, NST_MARK2);
02929 r = subexp_recursive_check(NENCLOSE(node)->target);
02930 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
02931 }
02932 break;
02933
02934 default:
02935 break;
02936 }
02937
02938 return r;
02939 }
02940
02941
02942 static int
02943 subexp_recursive_check_trav(Node* node, ScanEnv* env)
02944 {
02945 #define FOUND_CALLED_NODE 1
02946
02947 int type;
02948 int r = 0;
02949
02950 type = NTYPE(node);
02951 switch (type) {
02952 case NT_LIST:
02953 case NT_ALT:
02954 {
02955 int ret;
02956 do {
02957 ret = subexp_recursive_check_trav(NCAR(node), env);
02958 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
02959 else if (ret < 0) return ret;
02960 } while (IS_NOT_NULL(node = NCDR(node)));
02961 }
02962 break;
02963
02964 case NT_QTFR:
02965 r = subexp_recursive_check_trav(NQTFR(node)->target, env);
02966 if (NQTFR(node)->upper == 0) {
02967 if (r == FOUND_CALLED_NODE)
02968 NQTFR(node)->is_refered = 1;
02969 }
02970 break;
02971
02972 case NT_ANCHOR:
02973 {
02974 AnchorNode* an = NANCHOR(node);
02975 switch (an->type) {
02976 case ANCHOR_PREC_READ:
02977 case ANCHOR_PREC_READ_NOT:
02978 case ANCHOR_LOOK_BEHIND:
02979 case ANCHOR_LOOK_BEHIND_NOT:
02980 r = subexp_recursive_check_trav(an->target, env);
02981 break;
02982 }
02983 }
02984 break;
02985
02986 case NT_ENCLOSE:
02987 {
02988 EncloseNode* en = NENCLOSE(node);
02989
02990 if (! IS_ENCLOSE_RECURSION(en)) {
02991 if (IS_ENCLOSE_CALLED(en)) {
02992 SET_ENCLOSE_STATUS(node, NST_MARK1);
02993 r = subexp_recursive_check(en->target);
02994 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
02995 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
02996 }
02997 }
02998 r = subexp_recursive_check_trav(en->target, env);
02999 if (IS_ENCLOSE_CALLED(en))
03000 r |= FOUND_CALLED_NODE;
03001 }
03002 break;
03003
03004 default:
03005 break;
03006 }
03007
03008 return r;
03009 }
03010
03011 static int
03012 setup_subexp_call(Node* node, ScanEnv* env)
03013 {
03014 int type;
03015 int r = 0;
03016
03017 type = NTYPE(node);
03018 switch (type) {
03019 case NT_LIST:
03020 do {
03021 r = setup_subexp_call(NCAR(node), env);
03022 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03023 break;
03024
03025 case NT_ALT:
03026 do {
03027 r = setup_subexp_call(NCAR(node), env);
03028 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03029 break;
03030
03031 case NT_QTFR:
03032 r = setup_subexp_call(NQTFR(node)->target, env);
03033 break;
03034 case NT_ENCLOSE:
03035 r = setup_subexp_call(NENCLOSE(node)->target, env);
03036 break;
03037
03038 case NT_CALL:
03039 {
03040 CallNode* cn = NCALL(node);
03041 Node** nodes = SCANENV_MEM_NODES(env);
03042
03043 if (cn->group_num != 0) {
03044 int gnum = cn->group_num;
03045
03046 #ifdef USE_NAMED_GROUP
03047 if (env->num_named > 0 &&
03048 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
03049 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
03050 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
03051 }
03052 #endif
03053 if (gnum > env->num_mem) {
03054 onig_scan_env_set_error_string(env,
03055 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
03056 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
03057 }
03058
03059 #ifdef USE_NAMED_GROUP
03060 set_call_attr:
03061 #endif
03062 cn->target = nodes[cn->group_num];
03063 if (IS_NULL(cn->target)) {
03064 onig_scan_env_set_error_string(env,
03065 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
03066 return ONIGERR_UNDEFINED_NAME_REFERENCE;
03067 }
03068 SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
03069 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
03070 cn->unset_addr_list = env->unset_addr_list;
03071 }
03072 #ifdef USE_NAMED_GROUP
03073 else {
03074 int *refs;
03075
03076 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
03077 &refs);
03078 if (n <= 0) {
03079 onig_scan_env_set_error_string(env,
03080 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
03081 return ONIGERR_UNDEFINED_NAME_REFERENCE;
03082 }
03083 else if (n > 1) {
03084 onig_scan_env_set_error_string(env,
03085 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
03086 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
03087 }
03088 else {
03089 cn->group_num = refs[0];
03090 goto set_call_attr;
03091 }
03092 }
03093 #endif
03094 }
03095 break;
03096
03097 case NT_ANCHOR:
03098 {
03099 AnchorNode* an = NANCHOR(node);
03100
03101 switch (an->type) {
03102 case ANCHOR_PREC_READ:
03103 case ANCHOR_PREC_READ_NOT:
03104 case ANCHOR_LOOK_BEHIND:
03105 case ANCHOR_LOOK_BEHIND_NOT:
03106 r = setup_subexp_call(an->target, env);
03107 break;
03108 }
03109 }
03110 break;
03111
03112 default:
03113 break;
03114 }
03115
03116 return r;
03117 }
03118 #endif
03119
03120
03121
03122
03123
03124 static int
03125 divide_look_behind_alternatives(Node* node)
03126 {
03127 Node *head, *np, *insert_node;
03128 AnchorNode* an = NANCHOR(node);
03129 int anc_type = an->type;
03130
03131 head = an->target;
03132 np = NCAR(head);
03133 swap_node(node, head);
03134 NCAR(node) = head;
03135 NANCHOR(head)->target = np;
03136
03137 np = node;
03138 while ((np = NCDR(np)) != NULL_NODE) {
03139 insert_node = onig_node_new_anchor(anc_type);
03140 CHECK_NULL_RETURN_MEMERR(insert_node);
03141 NANCHOR(insert_node)->target = NCAR(np);
03142 NCAR(np) = insert_node;
03143 }
03144
03145 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
03146 np = node;
03147 do {
03148 SET_NTYPE(np, NT_LIST);
03149 } while ((np = NCDR(np)) != NULL_NODE);
03150 }
03151 return 0;
03152 }
03153
03154 static int
03155 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
03156 {
03157 int r, len;
03158 AnchorNode* an = NANCHOR(node);
03159
03160 r = get_char_length_tree(an->target, reg, &len);
03161 if (r == 0)
03162 an->char_len = len;
03163 else if (r == GET_CHAR_LEN_VARLEN)
03164 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03165 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
03166 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
03167 r = divide_look_behind_alternatives(node);
03168 else
03169 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03170 }
03171
03172 return r;
03173 }
03174
03175 static int
03176 next_setup(Node* node, Node* next_node, regex_t* reg)
03177 {
03178 int type;
03179
03180 retry:
03181 type = NTYPE(node);
03182 if (type == NT_QTFR) {
03183 QtfrNode* qn = NQTFR(node);
03184 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
03185 #ifdef USE_QTFR_PEEK_NEXT
03186 Node* n = get_head_value_node(next_node, 1, reg);
03187
03188 if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
03189 qn->next_head_exact = n;
03190 }
03191 #endif
03192
03193 if (qn->lower <= 1) {
03194 int ttype = NTYPE(qn->target);
03195 if (IS_NODE_TYPE_SIMPLE(ttype)) {
03196 Node *x, *y;
03197 x = get_head_value_node(qn->target, 0, reg);
03198 if (IS_NOT_NULL(x)) {
03199 y = get_head_value_node(next_node, 0, reg);
03200 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
03201 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
03202 CHECK_NULL_RETURN_MEMERR(en);
03203 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
03204 swap_node(node, en);
03205 NENCLOSE(node)->target = en;
03206 }
03207 }
03208 }
03209 }
03210 }
03211 }
03212 else if (type == NT_ENCLOSE) {
03213 EncloseNode* en = NENCLOSE(node);
03214 if (en->type == ENCLOSE_MEMORY) {
03215 node = en->target;
03216 goto retry;
03217 }
03218 }
03219 return 0;
03220 }
03221
03222
03223 static int
03224 update_string_node_case_fold(regex_t* reg, Node *node)
03225 {
03226 UChar *p, *q, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
03227 UChar *sbuf, *ebuf, *sp;
03228 int r, i, len;
03229 OnigDistance sbuf_size;
03230 StrNode* sn = NSTR(node);
03231
03232 end = sn->end;
03233 sbuf_size = (end - sn->s) * 2;
03234 sbuf = (UChar* )xmalloc(sbuf_size);
03235 CHECK_NULL_RETURN_MEMERR(sbuf);
03236 ebuf = sbuf + sbuf_size;
03237
03238 sp = sbuf;
03239 p = sn->s;
03240 while (p < end) {
03241 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
03242 q = buf;
03243 for (i = 0; i < len; i++) {
03244 if (sp >= ebuf) {
03245 sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2);
03246 CHECK_NULL_RETURN_MEMERR(sbuf);
03247 sp = sbuf + sbuf_size;
03248 sbuf_size *= 2;
03249 ebuf = sbuf + sbuf_size;
03250 }
03251
03252 *sp++ = buf[i];
03253 }
03254 }
03255
03256 r = onig_node_str_set(node, sbuf, sp);
03257 if (r != 0) {
03258 xfree(sbuf);
03259 return r;
03260 }
03261
03262 xfree(sbuf);
03263 return 0;
03264 }
03265
03266 static int
03267 expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
03268 regex_t* reg)
03269 {
03270 int r;
03271 Node *node;
03272
03273 node = onig_node_new_str(s, end);
03274 if (IS_NULL(node)) return ONIGERR_MEMORY;
03275
03276 r = update_string_node_case_fold(reg, node);
03277 if (r != 0) {
03278 onig_node_free(node);
03279 return r;
03280 }
03281
03282 NSTRING_SET_AMBIG(node);
03283 NSTRING_SET_DONT_GET_OPT_INFO(node);
03284 *rnode = node;
03285 return 0;
03286 }
03287
03288 static int
03289 expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
03290 UChar *p, int slen, UChar *end,
03291 regex_t* reg, Node **rnode)
03292 {
03293 int r, i, j, len, varlen;
03294 Node *anode, *var_anode, *snode, *xnode, *an;
03295 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
03296
03297 *rnode = var_anode = NULL_NODE;
03298
03299 varlen = 0;
03300 for (i = 0; i < item_num; i++) {
03301 if (items[i].byte_len != slen) {
03302 varlen = 1;
03303 break;
03304 }
03305 }
03306
03307 if (varlen != 0) {
03308 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03309 if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
03310
03311 xnode = onig_node_new_list(NULL, NULL);
03312 if (IS_NULL(xnode)) goto mem_err;
03313 NCAR(var_anode) = xnode;
03314
03315 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03316 if (IS_NULL(anode)) goto mem_err;
03317 NCAR(xnode) = anode;
03318 }
03319 else {
03320 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03321 if (IS_NULL(anode)) return ONIGERR_MEMORY;
03322 }
03323
03324 snode = onig_node_new_str(p, p + slen);
03325 if (IS_NULL(snode)) goto mem_err;
03326
03327 NCAR(anode) = snode;
03328
03329 for (i = 0; i < item_num; i++) {
03330 snode = onig_node_new_str(NULL, NULL);
03331 if (IS_NULL(snode)) goto mem_err;
03332
03333 for (j = 0; j < items[i].code_len; j++) {
03334 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
03335 if (len < 0) {
03336 r = len;
03337 goto mem_err2;
03338 }
03339
03340 r = onig_node_str_cat(snode, buf, buf + len);
03341 if (r != 0) goto mem_err2;
03342 }
03343
03344 an = onig_node_new_alt(NULL_NODE, NULL_NODE);
03345 if (IS_NULL(an)) {
03346 goto mem_err2;
03347 }
03348
03349 if (items[i].byte_len != slen) {
03350 Node *rem;
03351 UChar *q = p + items[i].byte_len;
03352
03353 if (q < end) {
03354 r = expand_case_fold_make_rem_string(&rem, q, end, reg);
03355 if (r != 0) {
03356 onig_node_free(an);
03357 goto mem_err2;
03358 }
03359
03360 xnode = onig_node_list_add(NULL_NODE, snode);
03361 if (IS_NULL(xnode)) {
03362 onig_node_free(an);
03363 onig_node_free(rem);
03364 goto mem_err2;
03365 }
03366 if (IS_NULL(onig_node_list_add(xnode, rem))) {
03367 onig_node_free(an);
03368 onig_node_free(xnode);
03369 onig_node_free(rem);
03370 goto mem_err;
03371 }
03372
03373 NCAR(an) = xnode;
03374 }
03375 else {
03376 NCAR(an) = snode;
03377 }
03378
03379 NCDR(var_anode) = an;
03380 var_anode = an;
03381 }
03382 else {
03383 NCAR(an) = snode;
03384 NCDR(anode) = an;
03385 anode = an;
03386 }
03387 }
03388
03389 return varlen;
03390
03391 mem_err2:
03392 onig_node_free(snode);
03393
03394 mem_err:
03395 onig_node_free(*rnode);
03396
03397 return ONIGERR_MEMORY;
03398 }
03399
03400 static int
03401 expand_case_fold_string(Node* node, regex_t* reg)
03402 {
03403 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
03404
03405 int r, n, len, alt_num;
03406 UChar *start, *end, *p;
03407 Node *top_root, *root, *snode, *prev_node;
03408 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
03409 StrNode* sn = NSTR(node);
03410
03411 if (NSTRING_IS_AMBIG(node)) return 0;
03412
03413 start = sn->s;
03414 end = sn->end;
03415 if (start >= end) return 0;
03416
03417 r = 0;
03418 top_root = root = prev_node = snode = NULL_NODE;
03419 alt_num = 1;
03420 p = start;
03421 while (p < end) {
03422 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
03423 p, end, items);
03424 if (n < 0) {
03425 r = n;
03426 goto err;
03427 }
03428
03429 len = enclen(reg->enc, p, end);
03430
03431 if (n == 0) {
03432 if (IS_NULL(snode)) {
03433 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
03434 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03435 if (IS_NULL(root)) {
03436 onig_node_free(prev_node);
03437 goto mem_err;
03438 }
03439 }
03440
03441 prev_node = snode = onig_node_new_str(NULL, NULL);
03442 if (IS_NULL(snode)) goto mem_err;
03443 if (IS_NOT_NULL(root)) {
03444 if (IS_NULL(onig_node_list_add(root, snode))) {
03445 onig_node_free(snode);
03446 goto mem_err;
03447 }
03448 }
03449 }
03450
03451 r = onig_node_str_cat(snode, p, p + len);
03452 if (r != 0) goto err;
03453 }
03454 else {
03455 alt_num *= (n + 1);
03456 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
03457
03458 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
03459 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03460 if (IS_NULL(root)) {
03461 onig_node_free(prev_node);
03462 goto mem_err;
03463 }
03464 }
03465
03466 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
03467 if (r < 0) goto mem_err;
03468 if (r == 1) {
03469 if (IS_NULL(root)) {
03470 top_root = prev_node;
03471 }
03472 else {
03473 if (IS_NULL(onig_node_list_add(root, prev_node))) {
03474 onig_node_free(prev_node);
03475 goto mem_err;
03476 }
03477 }
03478
03479 root = NCAR(prev_node);
03480 }
03481 else {
03482 if (IS_NOT_NULL(root)) {
03483 if (IS_NULL(onig_node_list_add(root, prev_node))) {
03484 onig_node_free(prev_node);
03485 goto mem_err;
03486 }
03487 }
03488 }
03489
03490 snode = NULL_NODE;
03491 }
03492
03493 p += len;
03494 }
03495
03496 if (p < end) {
03497 Node *srem;
03498
03499 r = expand_case_fold_make_rem_string(&srem, p, end, reg);
03500 if (r != 0) goto mem_err;
03501
03502 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
03503 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03504 if (IS_NULL(root)) {
03505 onig_node_free(srem);
03506 onig_node_free(prev_node);
03507 goto mem_err;
03508 }
03509 }
03510
03511 if (IS_NULL(root)) {
03512 prev_node = srem;
03513 }
03514 else {
03515 if (IS_NULL(onig_node_list_add(root, srem))) {
03516 onig_node_free(srem);
03517 goto mem_err;
03518 }
03519 }
03520 }
03521
03522
03523 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
03524 swap_node(node, top_root);
03525 onig_node_free(top_root);
03526 return 0;
03527
03528 mem_err:
03529 r = ONIGERR_MEMORY;
03530
03531 err:
03532 onig_node_free(top_root);
03533 return r;
03534 }
03535
03536
03537 #ifdef USE_COMBINATION_EXPLOSION_CHECK
03538
03539 #define CEC_THRES_NUM_BIG_REPEAT 512
03540 #define CEC_INFINITE_NUM 0x7fffffff
03541
03542 #define CEC_IN_INFINITE_REPEAT (1<<0)
03543 #define CEC_IN_FINITE_REPEAT (1<<1)
03544 #define CEC_CONT_BIG_REPEAT (1<<2)
03545
03546 static int
03547 setup_comb_exp_check(Node* node, int state, ScanEnv* env)
03548 {
03549 int type;
03550 int r = state;
03551
03552 type = NTYPE(node);
03553 switch (type) {
03554 case NT_LIST:
03555 {
03556 Node* prev = NULL_NODE;
03557 do {
03558 r = setup_comb_exp_check(NCAR(node), r, env);
03559 prev = NCAR(node);
03560 } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
03561 }
03562 break;
03563
03564 case NT_ALT:
03565 {
03566 int ret;
03567 do {
03568 ret = setup_comb_exp_check(NCAR(node), state, env);
03569 r |= ret;
03570 } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
03571 }
03572 break;
03573
03574 case NT_QTFR:
03575 {
03576 int child_state = state;
03577 int add_state = 0;
03578 QtfrNode* qn = NQTFR(node);
03579 Node* target = qn->target;
03580 int var_num;
03581
03582 if (! IS_REPEAT_INFINITE(qn->upper)) {
03583 if (qn->upper > 1) {
03584
03585 child_state |= CEC_IN_FINITE_REPEAT;
03586
03587
03588 if (env->backrefed_mem == 0) {
03589 if (NTYPE(qn->target) == NT_ENCLOSE) {
03590 EncloseNode* en = NENCLOSE(qn->target);
03591 if (en->type == ENCLOSE_MEMORY) {
03592 if (NTYPE(en->target) == NT_QTFR) {
03593 QtfrNode* q = NQTFR(en->target);
03594 if (IS_REPEAT_INFINITE(q->upper)
03595 && q->greedy == qn->greedy) {
03596 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
03597 if (qn->upper == 1)
03598 child_state = state;
03599 }
03600 }
03601 }
03602 }
03603 }
03604 }
03605 }
03606
03607 if (state & CEC_IN_FINITE_REPEAT) {
03608 qn->comb_exp_check_num = -1;
03609 }
03610 else {
03611 if (IS_REPEAT_INFINITE(qn->upper)) {
03612 var_num = CEC_INFINITE_NUM;
03613 child_state |= CEC_IN_INFINITE_REPEAT;
03614 }
03615 else {
03616 var_num = qn->upper - qn->lower;
03617 }
03618
03619 if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
03620 add_state |= CEC_CONT_BIG_REPEAT;
03621
03622 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
03623 ((state & CEC_CONT_BIG_REPEAT) != 0 &&
03624 var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
03625 if (qn->comb_exp_check_num == 0) {
03626 env->num_comb_exp_check++;
03627 qn->comb_exp_check_num = env->num_comb_exp_check;
03628 if (env->curr_max_regnum > env->comb_exp_max_regnum)
03629 env->comb_exp_max_regnum = env->curr_max_regnum;
03630 }
03631 }
03632 }
03633
03634 r = setup_comb_exp_check(target, child_state, env);
03635 r |= add_state;
03636 }
03637 break;
03638
03639 case NT_ENCLOSE:
03640 {
03641 EncloseNode* en = NENCLOSE(node);
03642
03643 switch (en->type) {
03644 case ENCLOSE_MEMORY:
03645 {
03646 if (env->curr_max_regnum < en->regnum)
03647 env->curr_max_regnum = en->regnum;
03648
03649 r = setup_comb_exp_check(en->target, state, env);
03650 }
03651 break;
03652
03653 default:
03654 r = setup_comb_exp_check(en->target, state, env);
03655 break;
03656 }
03657 }
03658 break;
03659
03660 #ifdef USE_SUBEXP_CALL
03661 case NT_CALL:
03662 if (IS_CALL_RECURSION(NCALL(node)))
03663 env->has_recursion = 1;
03664 else
03665 r = setup_comb_exp_check(NCALL(node)->target, state, env);
03666 break;
03667 #endif
03668
03669 default:
03670 break;
03671 }
03672
03673 return r;
03674 }
03675 #endif
03676
03677 #define IN_ALT (1<<0)
03678 #define IN_NOT (1<<1)
03679 #define IN_REPEAT (1<<2)
03680 #define IN_VAR_REPEAT (1<<3)
03681
03682
03683
03684
03685
03686
03687
03688
03689
03690 static int
03691 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
03692 {
03693 int type;
03694 int r = 0;
03695
03696 restart:
03697 type = NTYPE(node);
03698 switch (type) {
03699 case NT_LIST:
03700 {
03701 Node* prev = NULL_NODE;
03702 do {
03703 r = setup_tree(NCAR(node), reg, state, env);
03704 if (IS_NOT_NULL(prev) && r == 0) {
03705 r = next_setup(prev, NCAR(node), reg);
03706 }
03707 prev = NCAR(node);
03708 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03709 }
03710 break;
03711
03712 case NT_ALT:
03713 do {
03714 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
03715 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03716 break;
03717
03718 case NT_CCLASS:
03719 break;
03720
03721 case NT_STR:
03722 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
03723 r = expand_case_fold_string(node, reg);
03724 }
03725 break;
03726
03727 case NT_CTYPE:
03728 case NT_CANY:
03729 break;
03730
03731 #ifdef USE_SUBEXP_CALL
03732 case NT_CALL:
03733 break;
03734 #endif
03735
03736 case NT_BREF:
03737 {
03738 int i;
03739 int* p;
03740 Node** nodes = SCANENV_MEM_NODES(env);
03741 BRefNode* br = NBREF(node);
03742 p = BACKREFS_P(br);
03743 for (i = 0; i < br->back_num; i++) {
03744 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
03745 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
03746 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
03747 #ifdef USE_BACKREF_WITH_LEVEL
03748 if (IS_BACKREF_NEST_LEVEL(br)) {
03749 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
03750 }
03751 #endif
03752 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
03753 }
03754 }
03755 break;
03756
03757 case NT_QTFR:
03758 {
03759 OnigDistance d;
03760 QtfrNode* qn = NQTFR(node);
03761 Node* target = qn->target;
03762
03763 if ((state & IN_REPEAT) != 0) {
03764 qn->state |= NST_IN_REPEAT;
03765 }
03766
03767 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
03768 r = get_min_match_length(target, &d, env);
03769 if (r) break;
03770 if (d == 0) {
03771 qn->target_empty_info = NQ_TARGET_IS_EMPTY;
03772 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
03773 r = quantifiers_memory_node_info(target);
03774 if (r < 0) break;
03775 if (r > 0) {
03776 qn->target_empty_info = r;
03777 }
03778 #endif
03779 #if 0
03780 r = get_max_match_length(target, &d, env);
03781 if (r == 0 && d == 0) {
03782
03783 qn->upper = 1;
03784 if (qn->lower > 1) qn->lower = 1;
03785 if (NTYPE(target) == NT_STR) {
03786 qn->upper = qn->lower = 0;
03787 }
03788 }
03789 #endif
03790 }
03791 }
03792
03793 state |= IN_REPEAT;
03794 if (qn->lower != qn->upper)
03795 state |= IN_VAR_REPEAT;
03796 r = setup_tree(target, reg, state, env);
03797 if (r) break;
03798
03799
03800 #define EXPAND_STRING_MAX_LENGTH 100
03801 if (NTYPE(target) == NT_STR) {
03802 if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
03803 qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
03804 OnigDistance len = NSTRING_LEN(target);
03805 StrNode* sn = NSTR(target);
03806
03807 if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
03808 int i, n = qn->lower;
03809 onig_node_conv_to_str_node(node, NSTR(target)->flag);
03810 for (i = 0; i < n; i++) {
03811 r = onig_node_str_cat(node, sn->s, sn->end);
03812 if (r) break;
03813 }
03814 onig_node_free(target);
03815 break;
03816 }
03817 }
03818 }
03819
03820 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
03821 if (qn->greedy && (qn->target_empty_info != 0)) {
03822 if (NTYPE(target) == NT_QTFR) {
03823 QtfrNode* tqn = NQTFR(target);
03824 if (IS_NOT_NULL(tqn->head_exact)) {
03825 qn->head_exact = tqn->head_exact;
03826 tqn->head_exact = NULL;
03827 }
03828 }
03829 else {
03830 qn->head_exact = get_head_value_node(qn->target, 1, reg);
03831 }
03832 }
03833 #endif
03834 }
03835 break;
03836
03837 case NT_ENCLOSE:
03838 {
03839 EncloseNode* en = NENCLOSE(node);
03840
03841 switch (en->type) {
03842 case ENCLOSE_OPTION:
03843 {
03844 OnigOptionType options = reg->options;
03845 reg->options = NENCLOSE(node)->option;
03846 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
03847 reg->options = options;
03848 }
03849 break;
03850
03851 case ENCLOSE_MEMORY:
03852 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
03853 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
03854
03855 }
03856 r = setup_tree(en->target, reg, state, env);
03857 break;
03858
03859 case ENCLOSE_STOP_BACKTRACK:
03860 {
03861 Node* target = en->target;
03862 r = setup_tree(target, reg, state, env);
03863 if (NTYPE(target) == NT_QTFR) {
03864 QtfrNode* tqn = NQTFR(target);
03865 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
03866 tqn->greedy != 0) {
03867 int qtype = NTYPE(tqn->target);
03868 if (IS_NODE_TYPE_SIMPLE(qtype))
03869 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
03870 }
03871 }
03872 }
03873 break;
03874 }
03875 }
03876 break;
03877
03878 case NT_ANCHOR:
03879 {
03880 AnchorNode* an = NANCHOR(node);
03881
03882 switch (an->type) {
03883 case ANCHOR_PREC_READ:
03884 r = setup_tree(an->target, reg, state, env);
03885 break;
03886 case ANCHOR_PREC_READ_NOT:
03887 r = setup_tree(an->target, reg, (state | IN_NOT), env);
03888 break;
03889
03890
03891 #define ALLOWED_TYPE_IN_LB \
03892 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
03893 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
03894
03895 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY )
03896 #define ALLOWED_ENCLOSE_IN_LB_NOT 0
03897
03898 #define ALLOWED_ANCHOR_IN_LB \
03899 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
03900 #define ALLOWED_ANCHOR_IN_LB_NOT \
03901 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
03902
03903 case ANCHOR_LOOK_BEHIND:
03904 {
03905 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
03906 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
03907 if (r < 0) return r;
03908 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03909 r = setup_look_behind(node, reg, env);
03910 if (r != 0) return r;
03911 if (NTYPE(node) != NT_ANCHOR) goto restart;
03912 r = setup_tree(an->target, reg, state, env);
03913 }
03914 break;
03915
03916 case ANCHOR_LOOK_BEHIND_NOT:
03917 {
03918 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
03919 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
03920 if (r < 0) return r;
03921 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03922 r = setup_look_behind(node, reg, env);
03923 if (r != 0) return r;
03924 if (NTYPE(node) != NT_ANCHOR) goto restart;
03925 r = setup_tree(an->target, reg, (state | IN_NOT), env);
03926 }
03927 break;
03928 }
03929 }
03930 break;
03931
03932 default:
03933 break;
03934 }
03935
03936 return r;
03937 }
03938
03939
03940 static int
03941 set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
03942 UChar skip[], int** int_skip)
03943 {
03944 OnigDistance i, len;
03945
03946 len = end - s;
03947 if (len < ONIG_CHAR_TABLE_SIZE) {
03948 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;
03949
03950 for (i = 0; i < len - 1; i++)
03951 skip[s[i]] = len - 1 - i;
03952 }
03953 else {
03954 if (IS_NULL(*int_skip)) {
03955 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
03956 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
03957 }
03958 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int)len;
03959
03960 for (i = 0; i < len - 1; i++)
03961 (*int_skip)[s[i]] = (int)(len - 1 - i);
03962 }
03963 return 0;
03964 }
03965
03966 #define OPT_EXACT_MAXLEN 24
03967
03968 typedef struct {
03969 OnigDistance min;
03970 OnigDistance max;
03971 } MinMaxLen;
03972
03973 typedef struct {
03974 MinMaxLen mmd;
03975 OnigEncoding enc;
03976 OnigOptionType options;
03977 OnigCaseFoldType case_fold_flag;
03978 ScanEnv* scan_env;
03979 } OptEnv;
03980
03981 typedef struct {
03982 int left_anchor;
03983 int right_anchor;
03984 } OptAncInfo;
03985
03986 typedef struct {
03987 MinMaxLen mmd;
03988 OptAncInfo anc;
03989
03990 int reach_end;
03991 int ignore_case;
03992 int len;
03993 UChar s[OPT_EXACT_MAXLEN];
03994 } OptExactInfo;
03995
03996 typedef struct {
03997 MinMaxLen mmd;
03998 OptAncInfo anc;
03999
04000 int value;
04001 UChar map[ONIG_CHAR_TABLE_SIZE];
04002 } OptMapInfo;
04003
04004 typedef struct {
04005 MinMaxLen len;
04006
04007 OptAncInfo anc;
04008 OptExactInfo exb;
04009 OptExactInfo exm;
04010 OptExactInfo expr;
04011
04012 OptMapInfo map;
04013 } NodeOptInfo;
04014
04015
04016 static int
04017 map_position_value(OnigEncoding enc, int i)
04018 {
04019 static const short int ByteValTable[] = {
04020 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
04021 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
04022 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
04023 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
04024 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
04025 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
04026 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
04027 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
04028 };
04029
04030 if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
04031 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
04032 return 20;
04033 else
04034 return (int )ByteValTable[i];
04035 }
04036 else
04037 return 4;
04038 }
04039
04040 static int
04041 distance_value(MinMaxLen* mm)
04042 {
04043
04044 static const short int dist_vals[] = {
04045 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
04046 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
04047 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
04048 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
04049 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
04050 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
04051 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
04052 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
04053 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
04054 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
04055 };
04056
04057 OnigDistance d;
04058
04059 if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
04060
04061 d = mm->max - mm->min;
04062 if (d < sizeof(dist_vals)/sizeof(dist_vals[0]))
04063
04064 return (int )dist_vals[d];
04065 else
04066 return 1;
04067 }
04068
04069 static int
04070 comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
04071 {
04072 if (v2 <= 0) return -1;
04073 if (v1 <= 0) return 1;
04074
04075 v1 *= distance_value(d1);
04076 v2 *= distance_value(d2);
04077
04078 if (v2 > v1) return 1;
04079 if (v2 < v1) return -1;
04080
04081 if (d2->min < d1->min) return 1;
04082 if (d2->min > d1->min) return -1;
04083 return 0;
04084 }
04085
04086 static int
04087 is_equal_mml(MinMaxLen* a, MinMaxLen* b)
04088 {
04089 return (a->min == b->min && a->max == b->max) ? 1 : 0;
04090 }
04091
04092
04093 static void
04094 set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
04095 {
04096 mml->min = min;
04097 mml->max = max;
04098 }
04099
04100 static void
04101 clear_mml(MinMaxLen* mml)
04102 {
04103 mml->min = mml->max = 0;
04104 }
04105
04106 static void
04107 copy_mml(MinMaxLen* to, MinMaxLen* from)
04108 {
04109 to->min = from->min;
04110 to->max = from->max;
04111 }
04112
04113 static void
04114 add_mml(MinMaxLen* to, MinMaxLen* from)
04115 {
04116 to->min = distance_add(to->min, from->min);
04117 to->max = distance_add(to->max, from->max);
04118 }
04119
04120 #if 0
04121 static void
04122 add_len_mml(MinMaxLen* to, OnigDistance len)
04123 {
04124 to->min = distance_add(to->min, len);
04125 to->max = distance_add(to->max, len);
04126 }
04127 #endif
04128
04129 static void
04130 alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
04131 {
04132 if (to->min > from->min) to->min = from->min;
04133 if (to->max < from->max) to->max = from->max;
04134 }
04135
04136 static void
04137 copy_opt_env(OptEnv* to, OptEnv* from)
04138 {
04139 *to = *from;
04140 }
04141
04142 static void
04143 clear_opt_anc_info(OptAncInfo* anc)
04144 {
04145 anc->left_anchor = 0;
04146 anc->right_anchor = 0;
04147 }
04148
04149 static void
04150 copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
04151 {
04152 *to = *from;
04153 }
04154
04155 static void
04156 concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
04157 OnigDistance left_len, OnigDistance right_len)
04158 {
04159 clear_opt_anc_info(to);
04160
04161 to->left_anchor = left->left_anchor;
04162 if (left_len == 0) {
04163 to->left_anchor |= right->left_anchor;
04164 }
04165
04166 to->right_anchor = right->right_anchor;
04167 if (right_len == 0) {
04168 to->right_anchor |= left->right_anchor;
04169 }
04170 }
04171
04172 static int
04173 is_left_anchor(int anc)
04174 {
04175 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
04176 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
04177 anc == ANCHOR_PREC_READ_NOT)
04178 return 0;
04179
04180 return 1;
04181 }
04182
04183 static int
04184 is_set_opt_anc_info(OptAncInfo* to, int anc)
04185 {
04186 if ((to->left_anchor & anc) != 0) return 1;
04187
04188 return ((to->right_anchor & anc) != 0 ? 1 : 0);
04189 }
04190
04191 static void
04192 add_opt_anc_info(OptAncInfo* to, int anc)
04193 {
04194 if (is_left_anchor(anc))
04195 to->left_anchor |= anc;
04196 else
04197 to->right_anchor |= anc;
04198 }
04199
04200 static void
04201 remove_opt_anc_info(OptAncInfo* to, int anc)
04202 {
04203 if (is_left_anchor(anc))
04204 to->left_anchor &= ~anc;
04205 else
04206 to->right_anchor &= ~anc;
04207 }
04208
04209 static void
04210 alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
04211 {
04212 to->left_anchor &= add->left_anchor;
04213 to->right_anchor &= add->right_anchor;
04214 }
04215
04216 static int
04217 is_full_opt_exact_info(OptExactInfo* ex)
04218 {
04219 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
04220 }
04221
04222 static void
04223 clear_opt_exact_info(OptExactInfo* ex)
04224 {
04225 clear_mml(&ex->mmd);
04226 clear_opt_anc_info(&ex->anc);
04227 ex->reach_end = 0;
04228 ex->ignore_case = 0;
04229 ex->len = 0;
04230 ex->s[0] = '\0';
04231 }
04232
04233 static void
04234 copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
04235 {
04236 *to = *from;
04237 }
04238
04239 static void
04240 concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
04241 {
04242 int i, j, len;
04243 UChar *p, *end;
04244 OptAncInfo tanc;
04245
04246 if (! to->ignore_case && add->ignore_case) {
04247 if (to->len >= add->len) return ;
04248
04249 to->ignore_case = 1;
04250 }
04251
04252 p = add->s;
04253 end = p + add->len;
04254 for (i = to->len; p < end; ) {
04255 len = enclen(enc, p, end);
04256 if (i + len > OPT_EXACT_MAXLEN) break;
04257 for (j = 0; j < len && p < end; j++)
04258 to->s[i++] = *p++;
04259 }
04260
04261 to->len = i;
04262 to->reach_end = (p == end ? add->reach_end : 0);
04263
04264 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
04265 if (! to->reach_end) tanc.right_anchor = 0;
04266 copy_opt_anc_info(&to->anc, &tanc);
04267 }
04268
04269 static void
04270 concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
04271 int raw ARG_UNUSED, OnigEncoding enc)
04272 {
04273 int i, j, len;
04274 UChar *p;
04275
04276 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
04277 len = enclen(enc, p, end);
04278 if (i + len > OPT_EXACT_MAXLEN) break;
04279 for (j = 0; j < len && p < end; j++)
04280 to->s[i++] = *p++;
04281 }
04282
04283 to->len = i;
04284 }
04285
04286 static void
04287 alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
04288 {
04289 int i, j, len;
04290
04291 if (add->len == 0 || to->len == 0) {
04292 clear_opt_exact_info(to);
04293 return ;
04294 }
04295
04296 if (! is_equal_mml(&to->mmd, &add->mmd)) {
04297 clear_opt_exact_info(to);
04298 return ;
04299 }
04300
04301 for (i = 0; i < to->len && i < add->len; ) {
04302 if (to->s[i] != add->s[i]) break;
04303 len = enclen(env->enc, to->s + i, to->s + to->len);
04304
04305 for (j = 1; j < len; j++) {
04306 if (to->s[i+j] != add->s[i+j]) break;
04307 }
04308 if (j < len) break;
04309 i += len;
04310 }
04311
04312 if (! add->reach_end || i < add->len || i < to->len) {
04313 to->reach_end = 0;
04314 }
04315 to->len = i;
04316 to->ignore_case |= add->ignore_case;
04317
04318 alt_merge_opt_anc_info(&to->anc, &add->anc);
04319 if (! to->reach_end) to->anc.right_anchor = 0;
04320 }
04321
04322 static void
04323 select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
04324 {
04325 int v1, v2;
04326
04327 v1 = now->len;
04328 v2 = alt->len;
04329
04330 if (v2 == 0) {
04331 return ;
04332 }
04333 else if (v1 == 0) {
04334 copy_opt_exact_info(now, alt);
04335 return ;
04336 }
04337 else if (v1 <= 2 && v2 <= 2) {
04338
04339 v2 = map_position_value(enc, now->s[0]);
04340 v1 = map_position_value(enc, alt->s[0]);
04341
04342 if (now->len > 1) v1 += 5;
04343 if (alt->len > 1) v2 += 5;
04344 }
04345
04346 if (now->ignore_case == 0) v1 *= 2;
04347 if (alt->ignore_case == 0) v2 *= 2;
04348
04349 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
04350 copy_opt_exact_info(now, alt);
04351 }
04352
04353 static void
04354 clear_opt_map_info(OptMapInfo* map)
04355 {
04356 static const OptMapInfo clean_info = {
04357 {0, 0}, {0, 0}, 0,
04358 {
04359 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04360 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04361 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04362 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04363 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04364 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04365 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04366 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04367 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04368 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04369 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04370 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04371 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04372 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04373 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04374 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
04375 }
04376 };
04377
04378 xmemcpy(map, &clean_info, sizeof(OptMapInfo));
04379 }
04380
04381 static void
04382 copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
04383 {
04384 *to = *from;
04385 }
04386
04387 static void
04388 add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
04389 {
04390 if (map->map[c] == 0) {
04391 map->map[c] = 1;
04392 map->value += map_position_value(enc, c);
04393 }
04394 }
04395
04396 static int
04397 add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
04398 OnigEncoding enc, OnigCaseFoldType case_fold_flag)
04399 {
04400 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
04401 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
04402 int i, n;
04403
04404 add_char_opt_map_info(map, p[0], enc);
04405
04406 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
04407 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
04408 if (n < 0) return n;
04409
04410 for (i = 0; i < n; i++) {
04411 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
04412 add_char_opt_map_info(map, buf[0], enc);
04413 }
04414
04415 return 0;
04416 }
04417
04418 static void
04419 select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
04420 {
04421 const int z = 1<<15;
04422
04423 int v1, v2;
04424
04425 if (alt->value == 0) return ;
04426 if (now->value == 0) {
04427 copy_opt_map_info(now, alt);
04428 return ;
04429 }
04430
04431 v1 = z / now->value;
04432 v2 = z / alt->value;
04433 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
04434 copy_opt_map_info(now, alt);
04435 }
04436
04437 static int
04438 comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
04439 {
04440 #define COMP_EM_BASE 20
04441 int ve, vm;
04442
04443 if (m->value <= 0) return -1;
04444
04445 ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
04446 vm = COMP_EM_BASE * 5 * 2 / m->value;
04447 return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
04448 }
04449
04450 static void
04451 alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
04452 {
04453 int i, val;
04454
04455
04456 if (to->value == 0) return ;
04457 if (add->value == 0 || to->mmd.max < add->mmd.min) {
04458 clear_opt_map_info(to);
04459 return ;
04460 }
04461
04462 alt_merge_mml(&to->mmd, &add->mmd);
04463
04464 val = 0;
04465 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
04466 if (add->map[i])
04467 to->map[i] = 1;
04468
04469 if (to->map[i])
04470 val += map_position_value(enc, i);
04471 }
04472 to->value = val;
04473
04474 alt_merge_opt_anc_info(&to->anc, &add->anc);
04475 }
04476
04477 static void
04478 set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
04479 {
04480 copy_mml(&(opt->exb.mmd), mmd);
04481 copy_mml(&(opt->expr.mmd), mmd);
04482 copy_mml(&(opt->map.mmd), mmd);
04483 }
04484
04485 static void
04486 clear_node_opt_info(NodeOptInfo* opt)
04487 {
04488 clear_mml(&opt->len);
04489 clear_opt_anc_info(&opt->anc);
04490 clear_opt_exact_info(&opt->exb);
04491 clear_opt_exact_info(&opt->exm);
04492 clear_opt_exact_info(&opt->expr);
04493 clear_opt_map_info(&opt->map);
04494 }
04495
04496 static void
04497 copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
04498 {
04499 *to = *from;
04500 }
04501
04502 static void
04503 concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
04504 {
04505 int exb_reach, exm_reach;
04506 OptAncInfo tanc;
04507
04508 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
04509 copy_opt_anc_info(&to->anc, &tanc);
04510
04511 if (add->exb.len > 0 && to->len.max == 0) {
04512 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
04513 to->len.max, add->len.max);
04514 copy_opt_anc_info(&add->exb.anc, &tanc);
04515 }
04516
04517 if (add->map.value > 0 && to->len.max == 0) {
04518 if (add->map.mmd.max == 0)
04519 add->map.anc.left_anchor |= to->anc.left_anchor;
04520 }
04521
04522 exb_reach = to->exb.reach_end;
04523 exm_reach = to->exm.reach_end;
04524
04525 if (add->len.max != 0)
04526 to->exb.reach_end = to->exm.reach_end = 0;
04527
04528 if (add->exb.len > 0) {
04529 if (exb_reach) {
04530 concat_opt_exact_info(&to->exb, &add->exb, enc);
04531 clear_opt_exact_info(&add->exb);
04532 }
04533 else if (exm_reach) {
04534 concat_opt_exact_info(&to->exm, &add->exb, enc);
04535 clear_opt_exact_info(&add->exb);
04536 }
04537 }
04538 select_opt_exact_info(enc, &to->exm, &add->exb);
04539 select_opt_exact_info(enc, &to->exm, &add->exm);
04540
04541 if (to->expr.len > 0) {
04542 if (add->len.max > 0) {
04543 if (to->expr.len > (int )add->len.max)
04544 to->expr.len = (int)add->len.max;
04545
04546 if (to->expr.mmd.max == 0)
04547 select_opt_exact_info(enc, &to->exb, &to->expr);
04548 else
04549 select_opt_exact_info(enc, &to->exm, &to->expr);
04550 }
04551 }
04552 else if (add->expr.len > 0) {
04553 copy_opt_exact_info(&to->expr, &add->expr);
04554 }
04555
04556 select_opt_map_info(&to->map, &add->map);
04557
04558 add_mml(&to->len, &add->len);
04559 }
04560
04561 static void
04562 alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
04563 {
04564 alt_merge_opt_anc_info (&to->anc, &add->anc);
04565 alt_merge_opt_exact_info(&to->exb, &add->exb, env);
04566 alt_merge_opt_exact_info(&to->exm, &add->exm, env);
04567 alt_merge_opt_exact_info(&to->expr, &add->expr, env);
04568 alt_merge_opt_map_info(env->enc, &to->map, &add->map);
04569
04570 alt_merge_mml(&to->len, &add->len);
04571 }
04572
04573
04574 #define MAX_NODE_OPT_INFO_REF_COUNT 5
04575
04576 static int
04577 optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
04578 {
04579 int type;
04580 int r = 0;
04581
04582 clear_node_opt_info(opt);
04583 set_bound_node_opt_info(opt, &env->mmd);
04584
04585 type = NTYPE(node);
04586 switch (type) {
04587 case NT_LIST:
04588 {
04589 OptEnv nenv;
04590 NodeOptInfo nopt;
04591 Node* nd = node;
04592
04593 copy_opt_env(&nenv, env);
04594 do {
04595 r = optimize_node_left(NCAR(nd), &nopt, &nenv);
04596 if (r == 0) {
04597 add_mml(&nenv.mmd, &nopt.len);
04598 concat_left_node_opt_info(env->enc, opt, &nopt);
04599 }
04600 } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
04601 }
04602 break;
04603
04604 case NT_ALT:
04605 {
04606 NodeOptInfo nopt;
04607 Node* nd = node;
04608
04609 do {
04610 r = optimize_node_left(NCAR(nd), &nopt, env);
04611 if (r == 0) {
04612 if (nd == node) copy_node_opt_info(opt, &nopt);
04613 else alt_merge_node_opt_info(opt, &nopt, env);
04614 }
04615 } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
04616 }
04617 break;
04618
04619 case NT_STR:
04620 {
04621 StrNode* sn = NSTR(node);
04622 OnigDistance slen = sn->end - sn->s;
04623 int is_raw = NSTRING_IS_RAW(node);
04624
04625 if (! NSTRING_IS_AMBIG(node)) {
04626 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
04627 NSTRING_IS_RAW(node), env->enc);
04628 if (slen > 0) {
04629 add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
04630 }
04631 set_mml(&opt->len, slen, slen);
04632 }
04633 else {
04634 OnigDistance max;
04635
04636 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
04637 int n = onigenc_strlen(env->enc, sn->s, sn->end);
04638 max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
04639 }
04640 else {
04641 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
04642 is_raw, env->enc);
04643 opt->exb.ignore_case = 1;
04644
04645 if (slen > 0) {
04646 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
04647 env->enc, env->case_fold_flag);
04648 if (r != 0) break;
04649 }
04650
04651 max = slen;
04652 }
04653
04654 set_mml(&opt->len, slen, max);
04655 }
04656
04657 if ((OnigDistance)opt->exb.len == slen)
04658 opt->exb.reach_end = 1;
04659 }
04660 break;
04661
04662 case NT_CCLASS:
04663 {
04664 int i, z;
04665 CClassNode* cc = NCCLASS(node);
04666
04667
04668
04669 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
04670 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
04671 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
04672
04673 set_mml(&opt->len, min, max);
04674 }
04675 else {
04676 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
04677 z = BITSET_AT(cc->bs, i);
04678 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
04679 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
04680 }
04681 }
04682 set_mml(&opt->len, 1, 1);
04683 }
04684 }
04685 break;
04686
04687 case NT_CTYPE:
04688 {
04689 int i, min, max;
04690
04691 max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
04692
04693 if (max == 1) {
04694 min = 1;
04695
04696 switch (NCTYPE(node)->ctype) {
04697 case ONIGENC_CTYPE_WORD:
04698 if (NCTYPE(node)->not != 0) {
04699 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
04700 if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
04701 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
04702 }
04703 }
04704 }
04705 else {
04706 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
04707 if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
04708 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
04709 }
04710 }
04711 }
04712 break;
04713 }
04714 }
04715 else {
04716 min = ONIGENC_MBC_MINLEN(env->enc);
04717 }
04718 set_mml(&opt->len, min, max);
04719 }
04720 break;
04721
04722 case NT_CANY:
04723 {
04724 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
04725 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
04726 set_mml(&opt->len, min, max);
04727 }
04728 break;
04729
04730 case NT_ANCHOR:
04731 switch (NANCHOR(node)->type) {
04732 case ANCHOR_BEGIN_BUF:
04733 case ANCHOR_BEGIN_POSITION:
04734 case ANCHOR_BEGIN_LINE:
04735 case ANCHOR_END_BUF:
04736 case ANCHOR_SEMI_END_BUF:
04737 case ANCHOR_END_LINE:
04738 add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
04739 break;
04740
04741 case ANCHOR_PREC_READ:
04742 {
04743 NodeOptInfo nopt;
04744
04745 r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
04746 if (r == 0) {
04747 if (nopt.exb.len > 0)
04748 copy_opt_exact_info(&opt->expr, &nopt.exb);
04749 else if (nopt.exm.len > 0)
04750 copy_opt_exact_info(&opt->expr, &nopt.exm);
04751
04752 opt->expr.reach_end = 0;
04753
04754 if (nopt.map.value > 0)
04755 copy_opt_map_info(&opt->map, &nopt.map);
04756 }
04757 }
04758 break;
04759
04760 case ANCHOR_PREC_READ_NOT:
04761 case ANCHOR_LOOK_BEHIND:
04762 case ANCHOR_LOOK_BEHIND_NOT:
04763 break;
04764 }
04765 break;
04766
04767 case NT_BREF:
04768 {
04769 int i;
04770 int* backs;
04771 OnigDistance min, max, tmin, tmax;
04772 Node** nodes = SCANENV_MEM_NODES(env->scan_env);
04773 BRefNode* br = NBREF(node);
04774
04775 if (br->state & NST_RECURSION) {
04776 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
04777 break;
04778 }
04779 backs = BACKREFS_P(br);
04780 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
04781 if (r != 0) break;
04782 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
04783 if (r != 0) break;
04784 for (i = 1; i < br->back_num; i++) {
04785 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
04786 if (r != 0) break;
04787 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
04788 if (r != 0) break;
04789 if (min > tmin) min = tmin;
04790 if (max < tmax) max = tmax;
04791 }
04792 if (r == 0) set_mml(&opt->len, min, max);
04793 }
04794 break;
04795
04796 #ifdef USE_SUBEXP_CALL
04797 case NT_CALL:
04798 if (IS_CALL_RECURSION(NCALL(node)))
04799 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
04800 else {
04801 OnigOptionType save = env->options;
04802 env->options = NENCLOSE(NCALL(node)->target)->option;
04803 r = optimize_node_left(NCALL(node)->target, opt, env);
04804 env->options = save;
04805 }
04806 break;
04807 #endif
04808
04809 case NT_QTFR:
04810 {
04811 int i;
04812 OnigDistance min, max;
04813 NodeOptInfo nopt;
04814 QtfrNode* qn = NQTFR(node);
04815
04816 r = optimize_node_left(qn->target, &nopt, env);
04817 if (r) break;
04818
04819 if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
04820 if (env->mmd.max == 0 &&
04821 NTYPE(qn->target) == NT_CANY && qn->greedy) {
04822 if (IS_MULTILINE(env->options))
04823 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
04824 else
04825 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
04826 }
04827 }
04828 else {
04829 if (qn->lower > 0) {
04830 copy_node_opt_info(opt, &nopt);
04831 if (nopt.exb.len > 0) {
04832 if (nopt.exb.reach_end) {
04833 for (i = 2; i <= qn->lower &&
04834 ! is_full_opt_exact_info(&opt->exb); i++) {
04835 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
04836 }
04837 if (i < qn->lower) {
04838 opt->exb.reach_end = 0;
04839 }
04840 }
04841 }
04842
04843 if (qn->lower != qn->upper) {
04844 opt->exb.reach_end = 0;
04845 opt->exm.reach_end = 0;
04846 }
04847 if (qn->lower > 1)
04848 opt->exm.reach_end = 0;
04849 }
04850 }
04851
04852 min = distance_multiply(nopt.len.min, qn->lower);
04853 if (IS_REPEAT_INFINITE(qn->upper))
04854 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
04855 else
04856 max = distance_multiply(nopt.len.max, qn->upper);
04857
04858 set_mml(&opt->len, min, max);
04859 }
04860 break;
04861
04862 case NT_ENCLOSE:
04863 {
04864 EncloseNode* en = NENCLOSE(node);
04865
04866 switch (en->type) {
04867 case ENCLOSE_OPTION:
04868 {
04869 OnigOptionType save = env->options;
04870
04871 env->options = en->option;
04872 r = optimize_node_left(en->target, opt, env);
04873 env->options = save;
04874 }
04875 break;
04876
04877 case ENCLOSE_MEMORY:
04878 #ifdef USE_SUBEXP_CALL
04879 en->opt_count++;
04880 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
04881 OnigDistance min, max;
04882
04883 min = 0;
04884 max = ONIG_INFINITE_DISTANCE;
04885 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
04886 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
04887 set_mml(&opt->len, min, max);
04888 }
04889 else
04890 #endif
04891 {
04892 r = optimize_node_left(en->target, opt, env);
04893
04894 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
04895 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
04896 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
04897 }
04898 }
04899 break;
04900
04901 case ENCLOSE_STOP_BACKTRACK:
04902 r = optimize_node_left(en->target, opt, env);
04903 break;
04904 }
04905 }
04906 break;
04907
04908 default:
04909 #ifdef ONIG_DEBUG
04910 if (!onig_is_prelude()) fprintf(stderr, "optimize_node_left: undefined node type %d\n",
04911 NTYPE(node));
04912 #endif
04913 r = ONIGERR_TYPE_BUG;
04914 break;
04915 }
04916
04917 return r;
04918 }
04919
04920 static int
04921 set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
04922 {
04923 int r;
04924
04925 if (e->len == 0) return 0;
04926
04927 if (e->ignore_case) {
04928 reg->exact = (UChar* )xmalloc(e->len);
04929 CHECK_NULL_RETURN_MEMERR(reg->exact);
04930 xmemcpy(reg->exact, e->s, e->len);
04931 reg->exact_end = reg->exact + e->len;
04932 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
04933 }
04934 else {
04935 int allow_reverse;
04936
04937 reg->exact = str_dup(e->s, e->s + e->len);
04938 CHECK_NULL_RETURN_MEMERR(reg->exact);
04939 reg->exact_end = reg->exact + e->len;
04940
04941 allow_reverse =
04942 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
04943
04944 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
04945 r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
04946 reg->map, &(reg->int_map));
04947 if (r) return r;
04948
04949 reg->optimize = (allow_reverse != 0
04950 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
04951 }
04952 else {
04953 reg->optimize = ONIG_OPTIMIZE_EXACT;
04954 }
04955 }
04956
04957 reg->dmin = e->mmd.min;
04958 reg->dmax = e->mmd.max;
04959
04960 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
04961 reg->threshold_len = (int)(reg->dmin + (reg->exact_end - reg->exact));
04962 }
04963
04964 return 0;
04965 }
04966
04967 static void
04968 set_optimize_map_info(regex_t* reg, OptMapInfo* m)
04969 {
04970 int i;
04971
04972 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
04973 reg->map[i] = m->map[i];
04974
04975 reg->optimize = ONIG_OPTIMIZE_MAP;
04976 reg->dmin = m->mmd.min;
04977 reg->dmax = m->mmd.max;
04978
04979 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
04980 reg->threshold_len = (int)(reg->dmin + 1);
04981 }
04982 }
04983
04984 static void
04985 set_sub_anchor(regex_t* reg, OptAncInfo* anc)
04986 {
04987 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
04988 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
04989 }
04990
04991 #ifdef ONIG_DEBUG
04992 static void print_optimize_info(FILE* f, regex_t* reg);
04993 #endif
04994
04995 static int
04996 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
04997 {
04998
04999 int r;
05000 NodeOptInfo opt;
05001 OptEnv env;
05002
05003 env.enc = reg->enc;
05004 env.options = reg->options;
05005 env.case_fold_flag = reg->case_fold_flag;
05006 env.scan_env = scan_env;
05007 clear_mml(&env.mmd);
05008
05009 r = optimize_node_left(node, &opt, &env);
05010 if (r) return r;
05011
05012 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
05013 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML);
05014
05015 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
05016
05017 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
05018 reg->anchor_dmin = opt.len.min;
05019 reg->anchor_dmax = opt.len.max;
05020 }
05021
05022 if (opt.exb.len > 0 || opt.exm.len > 0) {
05023 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
05024 if (opt.map.value > 0 &&
05025 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
05026 goto set_map;
05027 }
05028 else {
05029 r = set_optimize_exact_info(reg, &opt.exb);
05030 set_sub_anchor(reg, &opt.exb.anc);
05031 }
05032 }
05033 else if (opt.map.value > 0) {
05034 set_map:
05035 set_optimize_map_info(reg, &opt.map);
05036 set_sub_anchor(reg, &opt.map.anc);
05037 }
05038 else {
05039 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
05040 if (opt.len.max == 0)
05041 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
05042 }
05043
05044 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
05045 if (!onig_is_prelude()) print_optimize_info(stderr, reg);
05046 #endif
05047 return r;
05048 }
05049
05050 static void
05051 clear_optimize_info(regex_t* reg)
05052 {
05053 reg->optimize = ONIG_OPTIMIZE_NONE;
05054 reg->anchor = 0;
05055 reg->anchor_dmin = 0;
05056 reg->anchor_dmax = 0;
05057 reg->sub_anchor = 0;
05058 reg->exact_end = (UChar* )NULL;
05059 reg->threshold_len = 0;
05060 if (IS_NOT_NULL(reg->exact)) {
05061 xfree(reg->exact);
05062 reg->exact = (UChar* )NULL;
05063 }
05064 }
05065
05066 #ifdef ONIG_DEBUG
05067
05068 static void print_enc_string(FILE* fp, OnigEncoding enc,
05069 const UChar *s, const UChar *end)
05070 {
05071 fprintf(fp, "\nPATTERN: /");
05072
05073 if (ONIGENC_MBC_MINLEN(enc) > 1) {
05074 const UChar *p;
05075 OnigCodePoint code;
05076
05077 p = s;
05078 while (p < end) {
05079 code = ONIGENC_MBC_TO_CODE(enc, p, end);
05080 if (code >= 0x80) {
05081 fprintf(fp, " 0x%04x ", (int )code);
05082 }
05083 else {
05084 fputc((int )code, fp);
05085 }
05086
05087 p += enclen(enc, p, end);
05088 }
05089 }
05090 else {
05091 while (s < end) {
05092 fputc((int )*s, fp);
05093 s++;
05094 }
05095 }
05096
05097 fprintf(fp, "/ (%s)\n", enc->name);
05098 }
05099
05100 static void
05101 print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
05102 {
05103 if (a == ONIG_INFINITE_DISTANCE)
05104 fputs("inf", f);
05105 else
05106 fprintf(f, "(%"PRIuSIZE")", a);
05107
05108 fputs("-", f);
05109
05110 if (b == ONIG_INFINITE_DISTANCE)
05111 fputs("inf", f);
05112 else
05113 fprintf(f, "(%"PRIuSIZE")", b);
05114 }
05115
05116 static void
05117 print_anchor(FILE* f, int anchor)
05118 {
05119 int q = 0;
05120
05121 fprintf(f, "[");
05122
05123 if (anchor & ANCHOR_BEGIN_BUF) {
05124 fprintf(f, "begin-buf");
05125 q = 1;
05126 }
05127 if (anchor & ANCHOR_BEGIN_LINE) {
05128 if (q) fprintf(f, ", ");
05129 q = 1;
05130 fprintf(f, "begin-line");
05131 }
05132 if (anchor & ANCHOR_BEGIN_POSITION) {
05133 if (q) fprintf(f, ", ");
05134 q = 1;
05135 fprintf(f, "begin-pos");
05136 }
05137 if (anchor & ANCHOR_END_BUF) {
05138 if (q) fprintf(f, ", ");
05139 q = 1;
05140 fprintf(f, "end-buf");
05141 }
05142 if (anchor & ANCHOR_SEMI_END_BUF) {
05143 if (q) fprintf(f, ", ");
05144 q = 1;
05145 fprintf(f, "semi-end-buf");
05146 }
05147 if (anchor & ANCHOR_END_LINE) {
05148 if (q) fprintf(f, ", ");
05149 q = 1;
05150 fprintf(f, "end-line");
05151 }
05152 if (anchor & ANCHOR_ANYCHAR_STAR) {
05153 if (q) fprintf(f, ", ");
05154 q = 1;
05155 fprintf(f, "anychar-star");
05156 }
05157 if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
05158 if (q) fprintf(f, ", ");
05159 fprintf(f, "anychar-star-pl");
05160 }
05161
05162 fprintf(f, "]");
05163 }
05164
05165 static void
05166 print_optimize_info(FILE* f, regex_t* reg)
05167 {
05168 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
05169 "EXACT_IC", "MAP" };
05170
05171 fprintf(f, "optimize: %s\n", on[reg->optimize]);
05172 fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
05173 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
05174 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
05175 fprintf(f, "\n");
05176
05177 if (reg->optimize) {
05178 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
05179 fprintf(f, "\n");
05180 }
05181 fprintf(f, "\n");
05182
05183 if (reg->exact) {
05184 UChar *p;
05185 fprintf(f, "exact: [");
05186 for (p = reg->exact; p < reg->exact_end; p++) {
05187 fputc(*p, f);
05188 }
05189 fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));
05190 }
05191 else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
05192 int c, i, n = 0;
05193
05194 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
05195 if (reg->map[i]) n++;
05196
05197 fprintf(f, "map: n=%d\n", n);
05198 if (n > 0) {
05199 c = 0;
05200 fputc('[', f);
05201 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
05202 if (reg->map[i] != 0) {
05203 if (c > 0) fputs(", ", f);
05204 c++;
05205 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
05206 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
05207 fputc(i, f);
05208 else
05209 fprintf(f, "%d", i);
05210 }
05211 }
05212 fprintf(f, "]\n");
05213 }
05214 }
05215 }
05216 #endif
05217
05218
05219 extern void
05220 onig_free_body(regex_t* reg)
05221 {
05222 if (IS_NOT_NULL(reg)) {
05223 if (IS_NOT_NULL(reg->p)) xfree(reg->p);
05224 if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
05225 if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
05226 if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
05227 if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
05228 if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
05229
05230 #ifdef USE_NAMED_GROUP
05231 onig_names_free(reg);
05232 #endif
05233 }
05234 }
05235
05236 extern void
05237 onig_free(regex_t* reg)
05238 {
05239 if (IS_NOT_NULL(reg)) {
05240 onig_free_body(reg);
05241 xfree(reg);
05242 }
05243 }
05244
05245 size_t
05246 onig_memsize(const regex_t *reg)
05247 {
05248 size_t size = sizeof(regex_t);
05249 if (IS_NOT_NULL(reg->p)) size += reg->alloc;
05250 if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
05251 if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
05252 if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
05253 if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
05254 if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
05255
05256 return size;
05257 }
05258
05259 #define REGEX_TRANSFER(to,from) do {\
05260 (to)->state = ONIG_STATE_MODIFY;\
05261 onig_free_body(to);\
05262 xmemcpy(to, from, sizeof(regex_t));\
05263 xfree(from);\
05264 } while (0)
05265
05266 extern void
05267 onig_transfer(regex_t* to, regex_t* from)
05268 {
05269 THREAD_ATOMIC_START;
05270 REGEX_TRANSFER(to, from);
05271 THREAD_ATOMIC_END;
05272 }
05273
05274 #define REGEX_CHAIN_HEAD(reg) do {\
05275 while (IS_NOT_NULL((reg)->chain)) {\
05276 (reg) = (reg)->chain;\
05277 }\
05278 } while (0)
05279
05280 extern void
05281 onig_chain_link_add(regex_t* to, regex_t* add)
05282 {
05283 THREAD_ATOMIC_START;
05284 REGEX_CHAIN_HEAD(to);
05285 to->chain = add;
05286 THREAD_ATOMIC_END;
05287 }
05288
05289 extern void
05290 onig_chain_reduce(regex_t* reg)
05291 {
05292 regex_t *head, *prev;
05293
05294 prev = reg;
05295 head = prev->chain;
05296 if (IS_NOT_NULL(head)) {
05297 reg->state = ONIG_STATE_MODIFY;
05298 while (IS_NOT_NULL(head->chain)) {
05299 prev = head;
05300 head = head->chain;
05301 }
05302 prev->chain = (regex_t* )NULL;
05303 REGEX_TRANSFER(reg, head);
05304 }
05305 }
05306
05307 #ifdef ONIG_DEBUG
05308 static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
05309 #endif
05310 #ifdef ONIG_DEBUG_PARSE_TREE
05311 static void print_tree P_((FILE* f, Node* node));
05312 #endif
05313
05314 extern int
05315 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
05316 OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
05317 {
05318 #define COMPILE_INIT_SIZE 20
05319
05320 int r;
05321 OnigDistance init_size;
05322 Node* root;
05323 ScanEnv scan_env = {0};
05324 #ifdef USE_SUBEXP_CALL
05325 UnsetAddrList uslist;
05326 #endif
05327
05328 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
05329
05330 scan_env.sourcefile = sourcefile;
05331 scan_env.sourceline = sourceline;
05332 reg->state = ONIG_STATE_COMPILING;
05333
05334 #ifdef ONIG_DEBUG
05335 if (!onig_is_prelude()) print_enc_string(stderr, reg->enc, pattern, pattern_end);
05336 #endif
05337
05338 if (reg->alloc == 0) {
05339 init_size = (pattern_end - pattern) * 2;
05340 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
05341 r = BBUF_INIT(reg, init_size);
05342 if (r != 0) goto end;
05343 }
05344 else
05345 reg->used = 0;
05346
05347 reg->num_mem = 0;
05348 reg->num_repeat = 0;
05349 reg->num_null_check = 0;
05350 reg->repeat_range_alloc = 0;
05351 reg->repeat_range = (OnigRepeatRange* )NULL;
05352 #ifdef USE_COMBINATION_EXPLOSION_CHECK
05353 reg->num_comb_exp_check = 0;
05354 #endif
05355
05356 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
05357 if (r != 0) goto err;
05358
05359 #ifdef ONIG_DEBUG_PARSE_TREE
05360 # if 0
05361 fprintf(stderr, "ORIGINAL PARSE TREE:\n");
05362 if (!onig_is_prelude()) {
05363 print_tree(stderr, root);
05364 }
05365 # endif
05366 #endif
05367
05368 #ifdef USE_NAMED_GROUP
05369
05370 if (scan_env.num_named > 0 &&
05371 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
05372 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
05373 if (scan_env.num_named != scan_env.num_mem)
05374 r = disable_noname_group_capture(&root, reg, &scan_env);
05375 else
05376 r = numbered_ref_check(root);
05377
05378 if (r != 0) goto err;
05379 }
05380 #endif
05381
05382 #ifdef USE_SUBEXP_CALL
05383 if (scan_env.num_call > 0) {
05384 r = unset_addr_list_init(&uslist, scan_env.num_call);
05385 if (r != 0) goto err;
05386 scan_env.unset_addr_list = &uslist;
05387 r = setup_subexp_call(root, &scan_env);
05388 if (r != 0) goto err_unset;
05389 r = subexp_recursive_check_trav(root, &scan_env);
05390 if (r < 0) goto err_unset;
05391 r = subexp_inf_recursive_check_trav(root, &scan_env);
05392 if (r != 0) goto err_unset;
05393
05394 reg->num_call = scan_env.num_call;
05395 }
05396 else
05397 reg->num_call = 0;
05398 #endif
05399
05400 r = setup_tree(root, reg, 0, &scan_env);
05401 if (r != 0) goto err_unset;
05402
05403 #ifdef ONIG_DEBUG_PARSE_TREE
05404 if (!onig_is_prelude()) print_tree(stderr, root);
05405 #endif
05406
05407 reg->capture_history = scan_env.capture_history;
05408 reg->bt_mem_start = scan_env.bt_mem_start;
05409 reg->bt_mem_start |= reg->capture_history;
05410 if (IS_FIND_CONDITION(reg->options))
05411 BIT_STATUS_ON_ALL(reg->bt_mem_end);
05412 else {
05413 reg->bt_mem_end = scan_env.bt_mem_end;
05414 reg->bt_mem_end |= reg->capture_history;
05415 }
05416
05417 #ifdef USE_COMBINATION_EXPLOSION_CHECK
05418 if (scan_env.backrefed_mem == 0
05419 #ifdef USE_SUBEXP_CALL
05420 || scan_env.num_call == 0
05421 #endif
05422 ) {
05423 setup_comb_exp_check(root, 0, &scan_env);
05424 #ifdef USE_SUBEXP_CALL
05425 if (scan_env.has_recursion != 0) {
05426 scan_env.num_comb_exp_check = 0;
05427 }
05428 else
05429 #endif
05430 if (scan_env.comb_exp_max_regnum > 0) {
05431 int i;
05432 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
05433 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
05434 scan_env.num_comb_exp_check = 0;
05435 break;
05436 }
05437 }
05438 }
05439 }
05440
05441 reg->num_comb_exp_check = scan_env.num_comb_exp_check;
05442 #endif
05443
05444 clear_optimize_info(reg);
05445 #ifndef ONIG_DONT_OPTIMIZE
05446 r = set_optimize_info_from_tree(root, reg, &scan_env);
05447 if (r != 0) goto err_unset;
05448 #endif
05449
05450 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
05451 xfree(scan_env.mem_nodes_dynamic);
05452 scan_env.mem_nodes_dynamic = (Node** )NULL;
05453 }
05454
05455 r = compile_tree(root, reg);
05456 if (r == 0) {
05457 r = add_opcode(reg, OP_END);
05458 #ifdef USE_SUBEXP_CALL
05459 if (scan_env.num_call > 0) {
05460 r = unset_addr_list_fix(&uslist, reg);
05461 unset_addr_list_end(&uslist);
05462 if (r) goto err;
05463 }
05464 #endif
05465
05466 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
05467 reg->stack_pop_level = STACK_POP_LEVEL_ALL;
05468 else {
05469 if (reg->bt_mem_start != 0)
05470 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
05471 else
05472 reg->stack_pop_level = STACK_POP_LEVEL_FREE;
05473 }
05474 }
05475 #ifdef USE_SUBEXP_CALL
05476 else if (scan_env.num_call > 0) {
05477 unset_addr_list_end(&uslist);
05478 }
05479 #endif
05480 onig_node_free(root);
05481
05482 #ifdef ONIG_DEBUG_COMPILE
05483 #ifdef USE_NAMED_GROUP
05484 if (!onig_is_prelude()) onig_print_names(stderr, reg);
05485 #endif
05486 if (!onig_is_prelude()) print_compiled_byte_code_list(stderr, reg);
05487 #endif
05488
05489 end:
05490 reg->state = ONIG_STATE_NORMAL;
05491 return r;
05492
05493 err_unset:
05494 #ifdef USE_SUBEXP_CALL
05495 if (scan_env.num_call > 0) {
05496 unset_addr_list_end(&uslist);
05497 }
05498 #endif
05499 err:
05500 if (IS_NOT_NULL(scan_env.error)) {
05501 if (IS_NOT_NULL(einfo)) {
05502 einfo->enc = scan_env.enc;
05503 einfo->par = scan_env.error;
05504 einfo->par_end = scan_env.error_end;
05505 }
05506 }
05507
05508 onig_node_free(root);
05509 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
05510 xfree(scan_env.mem_nodes_dynamic);
05511 return r;
05512 }
05513
05514 #ifdef USE_RECOMPILE_API
05515 extern int
05516 onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
05517 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
05518 OnigErrorInfo* einfo)
05519 {
05520 int r;
05521 regex_t *new_reg;
05522
05523 r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
05524 if (r) return r;
05525 if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
05526 onig_transfer(reg, new_reg);
05527 }
05528 else {
05529 onig_chain_link_add(reg, new_reg);
05530 }
05531 return 0;
05532 }
05533 #endif
05534
05535 static int onig_inited = 0;
05536
05537 extern int
05538 onig_reg_init(regex_t* reg, OnigOptionType option,
05539 OnigCaseFoldType case_fold_flag,
05540 OnigEncoding enc, const OnigSyntaxType* syntax)
05541 {
05542 if (! onig_inited)
05543 onig_init();
05544
05545 if (IS_NULL(reg))
05546 return ONIGERR_INVALID_ARGUMENT;
05547
05548 if (ONIGENC_IS_UNDEF(enc))
05549 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
05550
05551 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
05552 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
05553 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
05554 }
05555
05556 (reg)->state = ONIG_STATE_MODIFY;
05557
05558 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
05559 option |= syntax->options;
05560 option &= ~ONIG_OPTION_SINGLELINE;
05561 }
05562 else
05563 option |= syntax->options;
05564
05565 (reg)->enc = enc;
05566 (reg)->options = option;
05567 (reg)->syntax = syntax;
05568 (reg)->optimize = 0;
05569 (reg)->exact = (UChar* )NULL;
05570 (reg)->int_map = (int* )NULL;
05571 (reg)->int_map_backward = (int* )NULL;
05572 (reg)->chain = (regex_t* )NULL;
05573
05574 (reg)->p = (UChar* )NULL;
05575 (reg)->alloc = 0;
05576 (reg)->used = 0;
05577 (reg)->name_table = (void* )NULL;
05578
05579 (reg)->case_fold_flag = case_fold_flag;
05580 return 0;
05581 }
05582
05583 extern int
05584 onig_new_without_alloc(regex_t* reg, const UChar* pattern,
05585 const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
05586 OnigSyntaxType* syntax, OnigErrorInfo* einfo)
05587 {
05588 int r;
05589
05590 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
05591 if (r) return r;
05592
05593 r = onig_compile(reg, pattern, pattern_end, einfo, NULL, 0);
05594 return r;
05595 }
05596
05597 extern int
05598 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
05599 OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
05600 OnigErrorInfo* einfo)
05601 {
05602 int r;
05603
05604 *reg = (regex_t* )xmalloc(sizeof(regex_t));
05605 if (IS_NULL(*reg)) return ONIGERR_MEMORY;
05606
05607 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
05608 if (r) goto err;
05609
05610 r = onig_compile(*reg, pattern, pattern_end, einfo, NULL, 0);
05611 if (r) {
05612 err:
05613 onig_free(*reg);
05614 *reg = NULL;
05615 }
05616 return r;
05617 }
05618
05619
05620 extern int
05621 onig_init(void)
05622 {
05623 if (onig_inited != 0)
05624 return 0;
05625
05626 THREAD_SYSTEM_INIT;
05627 THREAD_ATOMIC_START;
05628
05629 onig_inited = 1;
05630
05631 onigenc_init();
05632
05633
05634 #ifdef ONIG_DEBUG_STATISTICS
05635 onig_statistics_init();
05636 #endif
05637
05638 THREAD_ATOMIC_END;
05639 return 0;
05640 }
05641
05642
05643 extern int
05644 onig_end(void)
05645 {
05646 THREAD_ATOMIC_START;
05647
05648 #ifdef ONIG_DEBUG_STATISTICS
05649 if (!onig_is_prelude()) onig_print_statistics(stderr);
05650 #endif
05651
05652 #ifdef USE_SHARED_CCLASS_TABLE
05653 onig_free_shared_cclass_table();
05654 #endif
05655
05656 #ifdef USE_PARSE_TREE_NODE_RECYCLE
05657 onig_free_node_list();
05658 #endif
05659
05660 onig_inited = 0;
05661
05662 THREAD_ATOMIC_END;
05663 THREAD_SYSTEM_END;
05664 return 0;
05665 }
05666
05667 extern int
05668 onig_is_in_code_range(const UChar* p, OnigCodePoint code)
05669 {
05670 OnigCodePoint n, *data;
05671 OnigCodePoint low, high, x;
05672
05673 GET_CODE_POINT(n, p);
05674 data = (OnigCodePoint* )p;
05675 data++;
05676
05677 for (low = 0, high = n; low < high; ) {
05678 x = (low + high) >> 1;
05679 if (code > data[x * 2 + 1])
05680 low = x + 1;
05681 else
05682 high = x;
05683 }
05684
05685 return ((low < n && code >= data[low * 2]) ? 1 : 0);
05686 }
05687
05688 extern int
05689 onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
05690 {
05691 int found;
05692
05693 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
05694 if (IS_NULL(cc->mbuf)) {
05695 found = 0;
05696 }
05697 else {
05698 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
05699 }
05700 }
05701 else {
05702 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
05703 }
05704
05705 if (IS_NCCLASS_NOT(cc))
05706 return !found;
05707 else
05708 return found;
05709 }
05710
05711 extern int
05712 onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
05713 {
05714 int len;
05715
05716 if (ONIGENC_MBC_MINLEN(enc) > 1) {
05717 len = 2;
05718 }
05719 else {
05720 len = ONIGENC_CODE_TO_MBCLEN(enc, code);
05721 }
05722 return onig_is_code_in_cc_len(len, code, cc);
05723 }
05724
05725
05726 #ifdef ONIG_DEBUG
05727
05728
05729 #define ARG_SPECIAL -1
05730 #define ARG_NON 0
05731 #define ARG_RELADDR 1
05732 #define ARG_ABSADDR 2
05733 #define ARG_LENGTH 3
05734 #define ARG_MEMNUM 4
05735 #define ARG_OPTION 5
05736 #define ARG_STATE_CHECK 6
05737
05738 OnigOpInfoType OnigOpInfo[] = {
05739 { OP_FINISH, "finish", ARG_NON },
05740 { OP_END, "end", ARG_NON },
05741 { OP_EXACT1, "exact1", ARG_SPECIAL },
05742 { OP_EXACT2, "exact2", ARG_SPECIAL },
05743 { OP_EXACT3, "exact3", ARG_SPECIAL },
05744 { OP_EXACT4, "exact4", ARG_SPECIAL },
05745 { OP_EXACT5, "exact5", ARG_SPECIAL },
05746 { OP_EXACTN, "exactn", ARG_SPECIAL },
05747 { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
05748 { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
05749 { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
05750 { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
05751 { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
05752 { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
05753 { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
05754 { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
05755 { OP_CCLASS, "cclass", ARG_SPECIAL },
05756 { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
05757 { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
05758 { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
05759 { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
05760 { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
05761 { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
05762 { OP_ANYCHAR, "anychar", ARG_NON },
05763 { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
05764 { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
05765 { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
05766 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
05767 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
05768 { OP_WORD, "word", ARG_NON },
05769 { OP_NOT_WORD, "not-word", ARG_NON },
05770 { OP_WORD_BOUND, "word-bound", ARG_NON },
05771 { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
05772 { OP_WORD_BEGIN, "word-begin", ARG_NON },
05773 { OP_WORD_END, "word-end", ARG_NON },
05774 { OP_BEGIN_BUF, "begin-buf", ARG_NON },
05775 { OP_END_BUF, "end-buf", ARG_NON },
05776 { OP_BEGIN_LINE, "begin-line", ARG_NON },
05777 { OP_END_LINE, "end-line", ARG_NON },
05778 { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
05779 { OP_BEGIN_POSITION, "begin-position", ARG_NON },
05780 { OP_BACKREF1, "backref1", ARG_NON },
05781 { OP_BACKREF2, "backref2", ARG_NON },
05782 { OP_BACKREFN, "backrefn", ARG_MEMNUM },
05783 { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
05784 { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
05785 { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
05786 { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
05787 { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
05788 { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
05789 { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
05790 { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
05791 { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
05792 { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
05793 { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
05794 { OP_SET_OPTION, "set-option", ARG_OPTION },
05795 { OP_FAIL, "fail", ARG_NON },
05796 { OP_JUMP, "jump", ARG_RELADDR },
05797 { OP_PUSH, "push", ARG_RELADDR },
05798 { OP_POP, "pop", ARG_NON },
05799 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
05800 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
05801 { OP_REPEAT, "repeat", ARG_SPECIAL },
05802 { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
05803 { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
05804 { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
05805 { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
05806 { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
05807 { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
05808 { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
05809 { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
05810 { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
05811 { OP_PUSH_POS, "push-pos", ARG_NON },
05812 { OP_POP_POS, "pop-pos", ARG_NON },
05813 { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
05814 { OP_FAIL_POS, "fail-pos", ARG_NON },
05815 { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
05816 { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
05817 { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
05818 { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
05819 { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
05820 { OP_CALL, "call", ARG_ABSADDR },
05821 { OP_RETURN, "return", ARG_NON },
05822 { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
05823 { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
05824 { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
05825 { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
05826 { OP_STATE_CHECK_ANYCHAR_ML_STAR,
05827 "state-check-anychar-ml*", ARG_STATE_CHECK },
05828 { -1, "", ARG_NON }
05829 };
05830
05831 static const char*
05832 op2name(int opcode)
05833 {
05834 int i;
05835
05836 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
05837 if (opcode == OnigOpInfo[i].opcode)
05838 return OnigOpInfo[i].name;
05839 }
05840 return "";
05841 }
05842
05843 static int
05844 op2arg_type(int opcode)
05845 {
05846 int i;
05847
05848 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
05849 if (opcode == OnigOpInfo[i].opcode)
05850 return OnigOpInfo[i].arg_type;
05851 }
05852 return ARG_SPECIAL;
05853 }
05854
05855 static void
05856 Indent(FILE* f, int indent)
05857 {
05858 int i;
05859 for (i = 0; i < indent; i++) putc(' ', f);
05860 }
05861
05862 static void
05863 p_string(FILE* f, int len, UChar* s)
05864 {
05865 fputs(":", f);
05866 while (len-- > 0) { fputc(*s++, f); }
05867 }
05868
05869 static void
05870 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
05871 {
05872 int x = len * mb_len;
05873
05874 fprintf(f, ":%d:", len);
05875 while (x-- > 0) { fputc(*s++, f); }
05876 }
05877
05878 extern void
05879 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
05880 OnigEncoding enc)
05881 {
05882 int i, n, arg_type;
05883 RelAddrType addr;
05884 LengthType len;
05885 MemNumType mem;
05886 StateCheckNumType scn;
05887 OnigCodePoint code;
05888 UChar *q;
05889
05890 fprintf(f, "[%s", op2name(*bp));
05891 arg_type = op2arg_type(*bp);
05892 if (arg_type != ARG_SPECIAL) {
05893 bp++;
05894 switch (arg_type) {
05895 case ARG_NON:
05896 break;
05897 case ARG_RELADDR:
05898 GET_RELADDR_INC(addr, bp);
05899 fprintf(f, ":(%d)", addr);
05900 break;
05901 case ARG_ABSADDR:
05902 GET_ABSADDR_INC(addr, bp);
05903 fprintf(f, ":(%d)", addr);
05904 break;
05905 case ARG_LENGTH:
05906 GET_LENGTH_INC(len, bp);
05907 fprintf(f, ":%d", len);
05908 break;
05909 case ARG_MEMNUM:
05910 mem = *((MemNumType* )bp);
05911 bp += SIZE_MEMNUM;
05912 fprintf(f, ":%d", mem);
05913 break;
05914 case ARG_OPTION:
05915 {
05916 OnigOptionType option = *((OnigOptionType* )bp);
05917 bp += SIZE_OPTION;
05918 fprintf(f, ":%d", option);
05919 }
05920 break;
05921
05922 case ARG_STATE_CHECK:
05923 scn = *((StateCheckNumType* )bp);
05924 bp += SIZE_STATE_CHECK_NUM;
05925 fprintf(f, ":%d", scn);
05926 break;
05927 }
05928 }
05929 else {
05930 switch (*bp++) {
05931 case OP_EXACT1:
05932 case OP_ANYCHAR_STAR_PEEK_NEXT:
05933 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
05934 p_string(f, 1, bp++); break;
05935 case OP_EXACT2:
05936 p_string(f, 2, bp); bp += 2; break;
05937 case OP_EXACT3:
05938 p_string(f, 3, bp); bp += 3; break;
05939 case OP_EXACT4:
05940 p_string(f, 4, bp); bp += 4; break;
05941 case OP_EXACT5:
05942 p_string(f, 5, bp); bp += 5; break;
05943 case OP_EXACTN:
05944 GET_LENGTH_INC(len, bp);
05945 p_len_string(f, len, 1, bp);
05946 bp += len;
05947 break;
05948
05949 case OP_EXACTMB2N1:
05950 p_string(f, 2, bp); bp += 2; break;
05951 case OP_EXACTMB2N2:
05952 p_string(f, 4, bp); bp += 4; break;
05953 case OP_EXACTMB2N3:
05954 p_string(f, 6, bp); bp += 6; break;
05955 case OP_EXACTMB2N:
05956 GET_LENGTH_INC(len, bp);
05957 p_len_string(f, len, 2, bp);
05958 bp += len * 2;
05959 break;
05960 case OP_EXACTMB3N:
05961 GET_LENGTH_INC(len, bp);
05962 p_len_string(f, len, 3, bp);
05963 bp += len * 3;
05964 break;
05965 case OP_EXACTMBN:
05966 {
05967 int mb_len;
05968
05969 GET_LENGTH_INC(mb_len, bp);
05970 GET_LENGTH_INC(len, bp);
05971 fprintf(f, ":%d:%d:", mb_len, len);
05972 n = len * mb_len;
05973 while (n-- > 0) { fputc(*bp++, f); }
05974 }
05975 break;
05976
05977 case OP_EXACT1_IC:
05978 len = enclen(enc, bp, bpend);
05979 p_string(f, len, bp);
05980 bp += len;
05981 break;
05982 case OP_EXACTN_IC:
05983 GET_LENGTH_INC(len, bp);
05984 p_len_string(f, len, 1, bp);
05985 bp += len;
05986 break;
05987
05988 case OP_CCLASS:
05989 n = bitset_on_num((BitSetRef )bp);
05990 bp += SIZE_BITSET;
05991 fprintf(f, ":%d", n);
05992 break;
05993
05994 case OP_CCLASS_NOT:
05995 n = bitset_on_num((BitSetRef )bp);
05996 bp += SIZE_BITSET;
05997 fprintf(f, ":%d", n);
05998 break;
05999
06000 case OP_CCLASS_MB:
06001 case OP_CCLASS_MB_NOT:
06002 GET_LENGTH_INC(len, bp);
06003 q = bp;
06004 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
06005 ALIGNMENT_RIGHT(q);
06006 #endif
06007 GET_CODE_POINT(code, q);
06008 bp += len;
06009 fprintf(f, ":%d:%d", (int )code, len);
06010 break;
06011
06012 case OP_CCLASS_MIX:
06013 case OP_CCLASS_MIX_NOT:
06014 n = bitset_on_num((BitSetRef )bp);
06015 bp += SIZE_BITSET;
06016 GET_LENGTH_INC(len, bp);
06017 q = bp;
06018 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
06019 ALIGNMENT_RIGHT(q);
06020 #endif
06021 GET_CODE_POINT(code, q);
06022 bp += len;
06023 fprintf(f, ":%d:%d:%d", n, (int )code, len);
06024 break;
06025
06026 case OP_CCLASS_NODE:
06027 {
06028 CClassNode *cc;
06029
06030 GET_POINTER_INC(cc, bp);
06031 n = bitset_on_num(cc->bs);
06032 fprintf(f, ":%"PRIuPTR":%d", (uintptr_t)cc, n);
06033 }
06034 break;
06035
06036 case OP_BACKREFN_IC:
06037 mem = *((MemNumType* )bp);
06038 bp += SIZE_MEMNUM;
06039 fprintf(f, ":%d", mem);
06040 break;
06041
06042 case OP_BACKREF_MULTI_IC:
06043 case OP_BACKREF_MULTI:
06044 fputs(" ", f);
06045 GET_LENGTH_INC(len, bp);
06046 for (i = 0; i < len; i++) {
06047 GET_MEMNUM_INC(mem, bp);
06048 if (i > 0) fputs(", ", f);
06049 fprintf(f, "%d", mem);
06050 }
06051 break;
06052
06053 case OP_BACKREF_WITH_LEVEL:
06054 {
06055 OnigOptionType option;
06056 LengthType level;
06057
06058 GET_OPTION_INC(option, bp);
06059 fprintf(f, ":%d", option);
06060 GET_LENGTH_INC(level, bp);
06061 fprintf(f, ":%d", level);
06062
06063 fputs(" ", f);
06064 GET_LENGTH_INC(len, bp);
06065 for (i = 0; i < len; i++) {
06066 GET_MEMNUM_INC(mem, bp);
06067 if (i > 0) fputs(", ", f);
06068 fprintf(f, "%d", mem);
06069 }
06070 }
06071 break;
06072
06073 case OP_REPEAT:
06074 case OP_REPEAT_NG:
06075 {
06076 mem = *((MemNumType* )bp);
06077 bp += SIZE_MEMNUM;
06078 addr = *((RelAddrType* )bp);
06079 bp += SIZE_RELADDR;
06080 fprintf(f, ":%d:%d", mem, addr);
06081 }
06082 break;
06083
06084 case OP_PUSH_OR_JUMP_EXACT1:
06085 case OP_PUSH_IF_PEEK_NEXT:
06086 addr = *((RelAddrType* )bp);
06087 bp += SIZE_RELADDR;
06088 fprintf(f, ":(%d)", addr);
06089 p_string(f, 1, bp);
06090 bp += 1;
06091 break;
06092
06093 case OP_LOOK_BEHIND:
06094 GET_LENGTH_INC(len, bp);
06095 fprintf(f, ":%d", len);
06096 break;
06097
06098 case OP_PUSH_LOOK_BEHIND_NOT:
06099 GET_RELADDR_INC(addr, bp);
06100 GET_LENGTH_INC(len, bp);
06101 fprintf(f, ":%d:(%d)", len, addr);
06102 break;
06103
06104 case OP_STATE_CHECK_PUSH:
06105 case OP_STATE_CHECK_PUSH_OR_JUMP:
06106 scn = *((StateCheckNumType* )bp);
06107 bp += SIZE_STATE_CHECK_NUM;
06108 addr = *((RelAddrType* )bp);
06109 bp += SIZE_RELADDR;
06110 fprintf(f, ":%d:(%d)", scn, addr);
06111 break;
06112
06113 default:
06114 fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
06115 *--bp);
06116 }
06117 }
06118 fputs("]", f);
06119 if (nextp) *nextp = bp;
06120 }
06121
06122 static void
06123 print_compiled_byte_code_list(FILE* f, regex_t* reg)
06124 {
06125 int ncode;
06126 UChar* bp = reg->p;
06127 UChar* end = reg->p + reg->used;
06128
06129 fprintf(f, "code length: %d\n", reg->used);
06130
06131 ncode = -1;
06132 while (bp < end) {
06133 ncode++;
06134 if (bp > reg->p) {
06135 if (ncode % 5 == 0)
06136 fprintf(f, "\n%ld:", bp-reg->p);
06137 else
06138 fprintf(f, " %ld:", bp-reg->p);
06139 }
06140 onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
06141 }
06142
06143 fprintf(f, "\n");
06144 }
06145
06146 static void
06147 print_indent_tree(FILE* f, Node* node, int indent)
06148 {
06149 int i, type, container_p = 0;
06150 int add = 3;
06151 UChar* p;
06152
06153 Indent(f, indent);
06154 if (IS_NULL(node)) {
06155 fprintf(f, "ERROR: null node!!!\n");
06156 exit (0);
06157 }
06158
06159 type = NTYPE(node);
06160 switch (type) {
06161 case NT_LIST:
06162 case NT_ALT:
06163 if (NTYPE(node) == NT_LIST)
06164 fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t)node);
06165 else
06166 fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t)node);
06167
06168 print_indent_tree(f, NCAR(node), indent + add);
06169 while (IS_NOT_NULL(node = NCDR(node))) {
06170 if (NTYPE(node) != type) {
06171 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
06172 exit(0);
06173 }
06174 print_indent_tree(f, NCAR(node), indent + add);
06175 }
06176 break;
06177
06178 case NT_STR:
06179 fprintf(f, "<string%s:%"PRIxPTR">",
06180 (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t)node);
06181 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
06182 if (*p >= 0x20 && *p < 0x7f)
06183 fputc(*p, f);
06184 else {
06185 fprintf(f, " 0x%02x", *p);
06186 }
06187 }
06188 break;
06189
06190 case NT_CCLASS:
06191 fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t)node);
06192 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
06193 if (NCCLASS(node)->mbuf) {
06194 BBuf* bbuf = NCCLASS(node)->mbuf;
06195 for (i = 0; i < (int)bbuf->used; i++) {
06196 if (i > 0) fprintf(f, ",");
06197 fprintf(f, "%0x", bbuf->p[i]);
06198 }
06199 }
06200 break;
06201
06202 case NT_CTYPE:
06203 fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t)node);
06204 switch (NCTYPE(node)->ctype) {
06205 case ONIGENC_CTYPE_WORD:
06206 if (NCTYPE(node)->not != 0)
06207 fputs("not word", f);
06208 else
06209 fputs("word", f);
06210 break;
06211
06212 default:
06213 fprintf(f, "ERROR: undefined ctype.\n");
06214 exit(0);
06215 }
06216 break;
06217
06218 case NT_CANY:
06219 fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t)node);
06220 break;
06221
06222 case NT_ANCHOR:
06223 fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t)node);
06224 switch (NANCHOR(node)->type) {
06225 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
06226 case ANCHOR_END_BUF: fputs("end buf", f); break;
06227 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
06228 case ANCHOR_END_LINE: fputs("end line", f); break;
06229 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
06230 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
06231
06232 case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
06233 case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
06234 #ifdef USE_WORD_BEGIN_END
06235 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
06236 case ANCHOR_WORD_END: fputs("word end", f); break;
06237 #endif
06238 case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
06239 case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
06240 case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
06241 case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
06242
06243 default:
06244 fprintf(f, "ERROR: undefined anchor type.\n");
06245 break;
06246 }
06247 break;
06248
06249 case NT_BREF:
06250 {
06251 int* p;
06252 BRefNode* br = NBREF(node);
06253 p = BACKREFS_P(br);
06254 fprintf(f, "<backref:%"PRIxPTR">", (intptr_t)node);
06255 for (i = 0; i < br->back_num; i++) {
06256 if (i > 0) fputs(", ", f);
06257 fprintf(f, "%d", p[i]);
06258 }
06259 }
06260 break;
06261
06262 #ifdef USE_SUBEXP_CALL
06263 case NT_CALL:
06264 {
06265 CallNode* cn = NCALL(node);
06266 fprintf(f, "<call:%"PRIxPTR">", (intptr_t)node);
06267 p_string(f, cn->name_end - cn->name, cn->name);
06268 }
06269 break;
06270 #endif
06271
06272 case NT_QTFR:
06273 fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t)node,
06274 NQTFR(node)->lower, NQTFR(node)->upper,
06275 (NQTFR(node)->greedy ? "" : "?"));
06276 print_indent_tree(f, NQTFR(node)->target, indent + add);
06277 break;
06278
06279 case NT_ENCLOSE:
06280 fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t)node);
06281 switch (NENCLOSE(node)->type) {
06282 case ENCLOSE_OPTION:
06283 fprintf(f, "option:%d\n", NENCLOSE(node)->option);
06284 print_indent_tree(f, NENCLOSE(node)->target, indent + add);
06285 break;
06286 case ENCLOSE_MEMORY:
06287 fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
06288 break;
06289 case ENCLOSE_STOP_BACKTRACK:
06290 fprintf(f, "stop-bt");
06291 break;
06292
06293 default:
06294 break;
06295 }
06296 fprintf(f, "\n");
06297 print_indent_tree(f, NENCLOSE(node)->target, indent + add);
06298 break;
06299
06300 default:
06301 fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
06302 break;
06303 }
06304
06305 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
06306 type != NT_ENCLOSE)
06307 fprintf(f, "\n");
06308
06309 if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
06310
06311 fflush(f);
06312 }
06313 #endif
06314
06315 #ifdef ONIG_DEBUG_PARSE_TREE
06316 static void
06317 print_tree(FILE* f, Node* node)
06318 {
06319 print_indent_tree(f, node, 0);
06320 }
06321 #endif
06322