1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 2 /* */ 3 /* This file is part of the program and library */ 4 /* SCIP --- Solving Constraint Integer Programs */ 5 /* */ 6 /* Copyright (c) 2002-2023 Zuse Institute Berlin (ZIB) */ 7 /* */ 8 /* Licensed under the Apache License, Version 2.0 (the "License"); */ 9 /* you may not use this file except in compliance with the License. */ 10 /* You may obtain a copy of the License at */ 11 /* */ 12 /* http://www.apache.org/licenses/LICENSE-2.0 */ 13 /* */ 14 /* Unless required by applicable law or agreed to in writing, software */ 15 /* distributed under the License is distributed on an "AS IS" BASIS, */ 16 /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ 17 /* See the License for the specific language governing permissions and */ 18 /* limitations under the License. */ 19 /* */ 20 /* You should have received a copy of the Apache-2.0 license */ 21 /* along with SCIP; see the file LICENSE. If not visit scipopt.org. */ 22 /* */ 23 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 24 25 /**@file xmldef.h 26 * @ingroup OTHER_CFILES 27 * @brief declarations for XML parsing 28 * @author Thorsten Koch 29 * @author Marc Pfetsch 30 * 31 * If SPEC_LIKE_SPACE_HANDLING is not defined, all LF,CR will be changed into spaces and from a 32 * sequence of spaces only one will be used. 33 * 34 * @todo Implement possibility to avoid the construction of parsing information for certain tags 35 * (and their children). For solution files this would avoid parsing the constraints section. 36 */ 37 38 /*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/ 39 40 #include <blockmemshell/memory.h> 41 42 #include "xml.h" 43 #include "xmldef.h" 44 #include "scip/misc.h" 45 46 47 #include <sys/types.h> 48 #ifdef SCIP_WITH_ZLIB 49 #if defined(_WIN32) || defined(_WIN64) 50 #define R_OK _A_RDONLY 51 #define access _access 52 #include <io.h> 53 #else 54 #include <unistd.h> 55 #endif 56 #endif 57 #include <stdio.h> 58 #include <stdlib.h> 59 #include <assert.h> 60 #include <ctype.h> 61 #include <string.h> 62 63 64 #define NAME_EXT_SIZE 128 65 #define ATTR_EXT_SIZE 4096 66 #define DATA_EXT_SIZE 4096 67 #define LINE_BUF_SIZE 8192 68 69 #define xmlError(a, b) xmlErrmsg(a, b, FALSE, __FILE__, __LINE__) 70 71 72 /* forward declarations */ 73 typedef struct parse_stack_struct PSTACK; 74 typedef struct parse_pos_struct PPOS; 75 76 /** state of the parser */ 77 enum parse_state_enum 78 { 79 XML_STATE_ERROR, 80 XML_STATE_BEFORE, 81 XML_STATE_IN_TAG, 82 XML_STATE_PCDATA, 83 XML_STATE_EOF 84 }; 85 typedef enum parse_state_enum PSTATE; 86 87 /** Stack as a (singly) linked list. The top element is the current node. */ 88 struct parse_stack_struct 89 { 90 XML_NODE* node; 91 PSTACK* next; 92 }; 93 94 /** Store the current position in the file and the state of the parser. */ 95 struct parse_pos_struct 96 { 97 const char* filename; 98 FPTYPE fp; 99 char buf[LINE_BUF_SIZE]; 100 int pos; 101 int lineno; 102 int nextsym; 103 int lastsym; 104 PSTATE state; 105 PSTACK* top; 106 }; 107 108 109 /** output error message with corresponding line and position */ 110 static void xmlErrmsg( 111 PPOS* ppos, 112 const char* msg, 113 XML_Bool msg_only, 114 const char* file, 115 int line 116 ) 117 { 118 #ifndef NDEBUG 119 int ret; 120 assert( ppos != NULL ); 121 122 if ( ! msg_only ) 123 { 124 ret = fprintf(stderr, "%s(%d) Error in file %s line %d\n", file, line, ppos->filename, ppos->lineno); 125 assert(ret >= 0); 126 127 ret = fprintf(stderr, "%s", ppos->buf); 128 assert(ret >= 0); 129 130 if ( strchr(ppos->buf, '\n') == NULL ) 131 { 132 int retc; 133 134 retc = fputc('\n', stderr); 135 assert(retc != EOF); 136 } 137 138 ret = fprintf(stderr, "%*s\n", ppos->pos, "^"); 139 assert(ret >= 0); 140 } 141 ret = fprintf(stderr, "%s\n\n", msg); 142 assert(ret >= 0); 143 144 #else 145 146 if ( ! msg_only ) 147 { 148 (void) fprintf(stderr, "%s(%d) Error in file %s line %d\n", file, line, ppos->filename, ppos->lineno); 149 150 (void) fprintf(stderr, "%s", ppos->buf); 151 152 if ( strchr(ppos->buf, '\n') == NULL ) 153 { 154 (void) fputc('\n', stderr); 155 } 156 157 (void) fprintf(stderr, "%*s\n", ppos->pos, "^"); 158 } 159 (void) fprintf(stderr, "%s\n\n", msg); 160 #endif 161 } 162 163 164 /** Push new element on the parse stack. 165 * 166 * TRUE if it worked, FAILURE otherwise. 167 */ 168 static 169 XML_Bool pushPstack( 170 PPOS* ppos, 171 XML_NODE* node 172 ) 173 { 174 PSTACK* p; 175 176 assert(ppos != NULL); 177 assert(node != NULL); 178 179 debugMessage("Pushing %s\n", node->name); 180 181 ALLOC_FALSE( BMSallocMemory(&p) ); 182 assert(p != NULL); 183 184 p->node = node; 185 p->next = ppos->top; 186 ppos->top = p; 187 188 return TRUE; 189 } 190 191 /** returns top element on stack (which has to be present) */ 192 static XML_NODE* topPstack( 193 const PPOS* ppos 194 ) 195 { 196 assert(ppos != NULL); 197 assert(ppos->top != NULL); 198 199 return ppos->top->node; 200 } 201 202 /** remove top element from stack and deletes it 203 * 204 * TRUE if ok, FALSE otherwise 205 */ 206 static 207 XML_Bool popPstack( 208 PPOS* ppos /**< input stream position */ 209 ) 210 { 211 PSTACK* p; 212 XML_Bool result; 213 214 assert(ppos != NULL); 215 216 if ( ppos->top == NULL ) 217 { 218 xmlError(ppos, "Stack underflow"); 219 result = FALSE; 220 } 221 else 222 { 223 result = TRUE; 224 p = ppos->top; 225 ppos->top = p->next; 226 227 debugMessage("Poping %s\n", p->node->name); 228 BMSfreeMemory(&p); 229 } 230 return result; 231 } 232 233 /** remove complete stack */ 234 static 235 void clearPstack( 236 PPOS* ppos 237 ) 238 { 239 assert(ppos != NULL); 240 241 while ( ppos->top != NULL ) 242 (void) popPstack(ppos); 243 } 244 245 /** Returns the next character from the input buffer and fills the buffer if it is empty (similar to fgetc()). */ 246 static 247 int mygetc( 248 PPOS* ppos 249 ) 250 { 251 assert(ppos != NULL); 252 assert(ppos->fp != NULL); 253 assert(ppos->pos < LINE_BUF_SIZE); 254 255 if ( ppos->buf[ppos->pos] == '\0' ) 256 { 257 #ifdef SCIP_DISABLED_CODE 258 /* the low level function gzread/fread used below seem to be faster */ 259 if ( NULL == FGETS(ppos->buf, sizeof(ppos->buf), ppos->fp) ) 260 return EOF; 261 #else 262 size_t len = (size_t) FREAD(ppos->buf, sizeof(ppos->buf) - 1, ppos->fp); /*lint !e571 !e747*/ 263 264 if( len == 0 || len > sizeof(ppos->buf) - 1 ) 265 return EOF; 266 267 ppos->buf[len] = '\0'; 268 #endif 269 ppos->pos = 0; 270 } 271 return (unsigned char)ppos->buf[ppos->pos++]; 272 } 273 274 275 #ifdef SPEC_LIKE_SPACE_HANDLING 276 /** Read input from fp_in. 277 * 278 * If there is a LF, CR, CR/LF, or LF/CR it returns exactly on LF. Also counts the number of 279 * characters. 280 */ 281 static 282 int getsymbol( 283 PPOS* ppos 284 ) 285 { 286 int c; 287 288 assert(ppos != NULL); 289 290 if ( ppos->nextsym == 0 ) 291 c = mygetc(ppos); 292 else 293 { 294 c = ppos->nextsym; 295 ppos->nextsym = 0; 296 } 297 assert(ppos->nextsym == 0); 298 299 if (((c == '\n') && (ppos->lastsym == '\r')) || ((c == '\r') && (ppos->lastsym == '\n'))) 300 c = mygetc(ppos); 301 302 ppos->lastsym = c; 303 304 if ( c == '\r' ) 305 c = '\n'; 306 307 if ( c == '\n' ) 308 ++ppos->lineno; 309 310 return c; 311 } 312 #else 313 /** Read input from fp_in (variant). 314 * 315 * Here we convert all LF or CR into SPACE and return maximally one SPACE after the other. 316 * 317 * @note This function counts lines differently. On systems that have only one '\\r' as line feed 318 * (MAC) it does not count correctly. 319 */ 320 static 321 int getsymbol( 322 PPOS* ppos 323 ) 324 { 325 int c; 326 327 assert(ppos != NULL); 328 329 do 330 { 331 if ( ppos->nextsym == 0 ) 332 c = mygetc(ppos); 333 else 334 { 335 c = ppos->nextsym; 336 ppos->nextsym = 0; 337 } 338 assert(ppos->nextsym == 0); 339 340 if ( c == '\n' ) 341 ++ppos->lineno; 342 343 if ((c == '\n') || (c == '\r')) 344 c = ' '; 345 } while((c == ' ') && (ppos->lastsym == c)); 346 347 ppos->lastsym = c; 348 349 debugMessage("[%c]\n", c); 350 351 return c; 352 } 353 #endif 354 355 /** Reinserts a character into the input stream */ 356 static 357 void ungetsymbol( 358 PPOS* ppos, 359 int c 360 ) 361 { 362 assert(ppos != NULL); 363 assert(ppos->nextsym == 0); 364 365 ppos->nextsym = c; 366 } 367 368 /** Skip all spaces and return the next non-space character or EOF */ 369 static 370 int skipSpace( 371 PPOS* ppos 372 ) 373 { 374 int c; 375 376 assert(ppos != NULL); 377 378 do 379 { 380 c = getsymbol(ppos); 381 } 382 while(isspace(c)); 383 384 return c; 385 } 386 387 /** Get name of a TAG or attribute from the input stream. 388 * 389 * Either it returns a pointer to allocated memory which contains the name or it returns NULL if 390 * there is some error. 391 */ 392 static 393 char* getName( 394 PPOS* ppos 395 ) 396 { 397 char* name = NULL; 398 size_t size = 0; 399 size_t len = 0; 400 int c; 401 402 assert(ppos != NULL); 403 404 c = getsymbol(ppos); 405 406 if ( ! isalpha(c) && (c != '_') && (c != ':') ) 407 { 408 xmlError(ppos, "Name starting with illegal charater"); 409 return NULL; 410 } 411 412 /* The following is wrong: Here almost all characters that we casted to unicode are feasible */ 413 while ( isalnum(c) || (c == '_') || (c == ':') || (c == '.') || (c == '-') ) 414 { 415 if ( len + 1 >= size ) 416 { 417 size += NAME_EXT_SIZE; 418 419 if ( name == NULL ) 420 { 421 ALLOC_ABORT( BMSallocMemoryArray(&name, size) ); 422 } 423 else 424 { 425 ALLOC_ABORT( BMSreallocMemoryArray(&name, size) ); 426 } 427 } 428 assert(name != NULL); 429 assert(size > len); 430 431 name[len++] = (char)c; 432 433 c = getsymbol(ppos); 434 } 435 if ( c != EOF ) 436 ungetsymbol(ppos, c); 437 438 assert(name != NULL); 439 440 if ( len == 0 ) 441 { 442 BMSfreeMemoryArray(&name); 443 name = NULL; 444 } 445 else 446 name[len] = '\0'; 447 448 return name; 449 } 450 451 /** Read the value of an attribute from the input stream. 452 * 453 * The value has to be between two " or ' (the other character is then valid as well). The function 454 * returns a pointer to allocated memory containing the value or it returns NULL in case of an 455 * error. 456 */ 457 static 458 char* getAttrval( 459 PPOS* ppos 460 ) 461 { 462 char* attr = NULL; 463 int c; 464 int stop; 465 size_t len = 0; 466 size_t size = 0; 467 468 assert(ppos != NULL); 469 470 /* The following is not allowed according to the specification (the value has to be directly 471 * after the equation sign). */ 472 c = skipSpace(ppos); 473 474 if ( (c != '"') && (c != '\'') ) 475 { 476 xmlError(ppos, "Atribute value does not start with \" or \'"); 477 return NULL; 478 } 479 stop = c; 480 481 for(;;) 482 { 483 if ( len == size ) 484 { 485 size += ATTR_EXT_SIZE; 486 487 if ( attr == NULL ) 488 { 489 ALLOC_ABORT( BMSallocMemoryArray(&attr, size) ); 490 } 491 else 492 { 493 ALLOC_ABORT( BMSreallocMemoryArray(&attr, size) ); 494 } 495 } 496 assert(attr != NULL); 497 assert(size > len); 498 499 c = getsymbol(ppos); 500 501 if ( (c == stop) || (c == EOF) ) 502 break; 503 504 attr[len++] = (char)c; 505 } 506 507 if ( c != EOF ) 508 attr[len] = '\0'; 509 else 510 { 511 BMSfreeMemoryArray(&attr); 512 attr = NULL; 513 } 514 return attr; 515 } 516 517 /** Skip comment 518 * 519 * Return FALSE if an error occurs. 520 */ 521 static 522 XML_Bool doComment( 523 PPOS* ppos 524 ) 525 { 526 XML_Bool result = TRUE; 527 int c; 528 int state = 0; 529 530 assert(ppos != NULL); 531 532 for(;;) 533 { 534 c = getsymbol(ppos); 535 536 if ( c == EOF ) 537 break; 538 539 if ( (c == '>') && (state >= 2) ) 540 break; 541 542 state = (c == '-') ? state + 1 : 0; 543 } 544 if ( c == EOF ) 545 { 546 xmlError(ppos, "Unexpected EOF in comment"); 547 result = FALSE; 548 } 549 return result; 550 } 551 552 /** Handles a CDATA section. 553 * 554 * Returns a pointer to allocated memory containing the data of this section or NULL in case of an 555 * error. 556 */ 557 static 558 char* doCdata( 559 PPOS* ppos 560 ) 561 { 562 char* data = NULL; 563 size_t size = 0; 564 size_t len = 0; 565 int state = 0; 566 int c; 567 568 assert(ppos != NULL); 569 570 for(;;) 571 { 572 c = getsymbol(ppos); 573 574 if ( c == EOF ) 575 break; 576 577 if ( c == ']' ) 578 state++; 579 else 580 if ( (c == '>') && (state >= 2) ) 581 break; 582 else 583 state = 0; 584 585 if ( len == size ) 586 { 587 size += DATA_EXT_SIZE; 588 589 if ( data == NULL ) 590 { 591 ALLOC_ABORT( BMSallocMemoryArray(&data, size) ); 592 } 593 else 594 { 595 ALLOC_ABORT( BMSreallocMemoryArray(&data, size) ); 596 } 597 } 598 assert(data != NULL); 599 assert(size > len); 600 601 data[len++] = (char)c; 602 } 603 assert(data != NULL); 604 605 /*lint --e{527}*/ 606 if ( c != EOF ) 607 { 608 assert(len >= 2); 609 assert(data != NULL); 610 611 data[len - 2] = '\0'; /*lint !e413*/ 612 } 613 else 614 { 615 BMSfreeMemoryArray(&data); 616 data = NULL; 617 xmlError(ppos, "Unexpected EOF in CDATA"); 618 } 619 return data; 620 } 621 622 /** Handle processing instructions (skipping) */ 623 static 624 void handlePi( 625 PPOS* ppos 626 ) 627 { 628 int c; 629 630 assert(ppos != NULL); 631 assert(ppos->state == XML_STATE_BEFORE); 632 633 do 634 { 635 c = getsymbol(ppos); 636 } 637 while ( (c != EOF) && (c != '>') ); 638 639 if ( c != EOF ) 640 ppos->state = XML_STATE_PCDATA; 641 else 642 { 643 xmlError(ppos, "Unexpected EOF in PI"); 644 ppos->state = XML_STATE_ERROR; 645 } 646 } 647 648 /** Handles declarations that start with a <!. 649 * 650 * This includes comments. Does currenlty not work very well, because of DTDs. 651 */ 652 static 653 void handleDecl( 654 PPOS* ppos 655 ) 656 { 657 enum XmlSection 658 { 659 IS_COMMENT, 660 IS_ATTLIST, 661 IS_DOCTYPE, 662 IS_ELEMENT, 663 IS_ENTITY, 664 IS_NOTATION, 665 IS_CDATA 666 }; 667 typedef enum XmlSection XMLSECTION; 668 669 static struct 670 { 671 const char* name; 672 XMLSECTION what; 673 } key[] = 674 { 675 { "--", IS_COMMENT }, 676 { "ATTLIST", IS_ATTLIST }, 677 { "DOCTYPE", IS_DOCTYPE }, 678 { "ELEMENT", IS_ELEMENT }, 679 { "ENTITY", IS_ENTITY }, 680 { "NOTATION", IS_NOTATION }, 681 { "[CDATA[", IS_CDATA } 682 }; 683 XML_NODE* node; 684 char* data; 685 int c; 686 int k = 0; 687 int beg = 0; 688 int end; 689 690 assert(ppos != NULL); 691 assert(ppos->state == XML_STATE_BEFORE); 692 693 end = (int) (sizeof(key) / sizeof(key[0])) - 1; 694 do 695 { 696 c = getsymbol(ppos); 697 698 for(; (beg <= end) && (c != key[beg].name[k]); beg++) 699 ; 700 for(; (end >= beg) && (c != key[end].name[k]); end--) 701 ; 702 k++; 703 } 704 while(beg < end); 705 706 if ( beg != end ) 707 { 708 xmlError(ppos, "Unknown declaration"); 709 710 while ( (c != EOF) && (c != '>') ) 711 c = getsymbol(ppos); 712 } 713 else 714 { 715 assert(beg == end); 716 assert(beg < (int)(sizeof(key) / sizeof(*key))); 717 assert(beg >= 0); 718 719 switch(key[beg].what) 720 { 721 case IS_COMMENT : 722 if ( ! doComment(ppos) ) 723 ppos->state = XML_STATE_ERROR; 724 break; 725 case IS_CDATA : 726 if ( (data = doCdata(ppos)) == NULL ) 727 ppos->state = XML_STATE_ERROR; 728 else 729 { 730 if ( NULL == (node = xmlNewNode("#CDATA", ppos->lineno)) ) 731 { 732 xmlError(ppos, "Can't create new node"); 733 ppos->state = XML_STATE_ERROR; 734 } 735 else 736 { 737 BMSduplicateMemoryArray(&node->data, data, strlen(data)+1); 738 BMSfreeMemoryArray(&data); 739 xmlAppendChild(topPstack(ppos), node); 740 } 741 } 742 break; 743 case IS_ATTLIST : 744 case IS_ELEMENT : 745 case IS_NOTATION : 746 case IS_ENTITY : 747 case IS_DOCTYPE : 748 break; 749 default : 750 abort(); 751 } 752 } 753 } 754 755 /** Handle end tag */ 756 static 757 void handleEndtag( 758 PPOS* ppos 759 ) 760 { 761 char* name; 762 int c; 763 764 assert(ppos != NULL); 765 766 if ( (name = getName(ppos)) == NULL ) 767 xmlError(ppos, "Missing name in endtag"); 768 else 769 { 770 c = skipSpace(ppos); 771 772 if ( c != '>' ) 773 { 774 xmlError(ppos, "Missing '>' in endtag"); 775 ppos->state = XML_STATE_ERROR; 776 } 777 else 778 { 779 if ( strcmp(name, topPstack(ppos)->name) ) 780 { 781 xmlError(ppos, "Name of endtag does not match starttag"); 782 ppos->state = XML_STATE_ERROR; 783 } 784 else 785 { 786 if ( popPstack(ppos) ) 787 ppos->state = XML_STATE_PCDATA; 788 else 789 ppos->state = XML_STATE_ERROR; 790 } 791 } 792 793 BMSfreeMemoryArray(&name); 794 } 795 } 796 797 /** Handle start tag */ 798 static 799 void handleStarttag( 800 PPOS* ppos 801 ) 802 { 803 XML_NODE* node; 804 char* name; 805 806 assert(ppos != NULL); 807 808 name = getName(ppos); 809 if ( name == NULL ) 810 { 811 xmlError(ppos, "Missing name in tagstart"); 812 ppos->state = XML_STATE_ERROR; 813 } 814 else 815 { 816 node = xmlNewNode(name, ppos->lineno); 817 if ( node == NULL ) 818 { 819 xmlError(ppos, "Can't create new node"); 820 ppos->state = XML_STATE_ERROR; 821 } 822 else 823 { 824 xmlAppendChild(topPstack(ppos), node); 825 826 if ( pushPstack(ppos, node) ) 827 ppos->state = XML_STATE_IN_TAG; 828 else 829 ppos->state = XML_STATE_ERROR; 830 } 831 BMSfreeMemoryArray(&name); 832 } 833 } 834 835 /** Checks for next tag */ 836 static 837 void procBefore( 838 PPOS* ppos /**< input stream position */ 839 ) 840 { 841 int c; 842 843 assert(ppos != NULL); 844 assert(ppos->state == XML_STATE_BEFORE); 845 846 c = skipSpace(ppos); 847 848 if ( c != '<' ) 849 { 850 xmlError(ppos, "Expecting '<'"); 851 ppos->state = XML_STATE_ERROR; 852 } 853 else 854 { 855 c = getsymbol(ppos); 856 857 switch(c) 858 { 859 case EOF : 860 xmlError(ppos, "Unexpected EOF"); 861 ppos->state = XML_STATE_ERROR; 862 break; 863 case '!' : 864 handleDecl(ppos); 865 break; 866 case '?' : 867 handlePi(ppos); 868 break; 869 case '/' : 870 handleEndtag(ppos); 871 break; 872 default : 873 ungetsymbol(ppos, c); 874 handleStarttag(ppos); 875 break; 876 } 877 } 878 } 879 880 /** Process tag */ 881 static 882 void procInTag( 883 PPOS* ppos /**< input stream position */ 884 ) 885 { 886 XML_ATTR* attr; 887 int c; 888 XML_Bool empty = FALSE; 889 char* name; 890 char* value; 891 892 assert(ppos != NULL); 893 assert(ppos->state == XML_STATE_IN_TAG); 894 895 c = skipSpace(ppos); 896 897 if ( (c == '/') || (c == '>') || (c == EOF) ) 898 { 899 if ( c == '/' ) 900 { 901 empty = TRUE; 902 c = getsymbol(ppos); 903 } 904 905 if ( c == EOF ) 906 { 907 xmlError(ppos, "Unexpected EOF while in a tag"); 908 ppos->state = XML_STATE_ERROR; 909 } 910 911 if ( c == '>' ) 912 { 913 ppos->state = XML_STATE_PCDATA; 914 915 if (empty && ! popPstack(ppos)) 916 ppos->state = XML_STATE_ERROR; 917 } 918 else 919 { 920 xmlError(ppos, "Expected tag end marker '>'"); 921 ppos->state = XML_STATE_ERROR; 922 } 923 } 924 else 925 { 926 ungetsymbol(ppos, c); 927 928 name = getName(ppos); 929 if ( name == NULL ) 930 { 931 xmlError(ppos, "No name for attribute"); 932 ppos->state = XML_STATE_ERROR; 933 } 934 else 935 { 936 c = skipSpace(ppos); 937 938 if ( (c != '=') || ((value = getAttrval(ppos)) == NULL) ) 939 { 940 xmlError(ppos, "Missing attribute value"); 941 ppos->state = XML_STATE_ERROR; 942 BMSfreeMemoryArray(&name); 943 } 944 else 945 { 946 attr = xmlNewAttr(name, value); 947 if ( attr == NULL ) 948 { 949 xmlError(ppos, "Can't create new attribute"); 950 ppos->state = XML_STATE_ERROR; 951 } 952 else 953 { 954 xmlAddAttr(topPstack(ppos), attr); 955 } 956 BMSfreeMemoryArray(&name); 957 BMSfreeMemoryArray(&value); 958 } 959 } 960 } 961 } 962 963 /* Handles PCDATA */ 964 static 965 void procPcdata( 966 PPOS* ppos /**< input stream position */ 967 ) 968 { 969 XML_NODE* node; 970 char* data = NULL; 971 size_t size = 0; 972 size_t len = 0; 973 int c; 974 975 assert(ppos != NULL); 976 assert(ppos->state == XML_STATE_PCDATA); 977 978 #ifndef SPEC_LIKE_SPACE_HANDLING 979 c = skipSpace(ppos); 980 if ( c != EOF ) 981 ungetsymbol(ppos, c); 982 #endif 983 c = getsymbol(ppos); 984 985 while ( (c != EOF) && (c != '<') ) 986 { 987 if ( len + 1 >= size ) /* leave space for terminating '\0' */ 988 { 989 size += DATA_EXT_SIZE; 990 991 if ( data == NULL ) 992 { 993 ALLOC_ABORT( BMSallocMemoryArray(&data, size) ); 994 } 995 else 996 { 997 ALLOC_ABORT( BMSreallocMemoryArray(&data, size) ); 998 } 999 } 1000 assert(data != NULL); 1001 assert(size > len + 1); 1002 1003 data[len++] = (char)c; 1004 1005 c = getsymbol(ppos); 1006 } 1007 if ( data == NULL ) 1008 { 1009 if ( c == EOF ) 1010 ppos->state = XML_STATE_EOF; 1011 else 1012 { 1013 assert(c == '<'); 1014 ppos->state = XML_STATE_BEFORE; 1015 ungetsymbol(ppos, c); 1016 } 1017 } 1018 else 1019 { 1020 assert(len < size); 1021 data[len] = '\0'; 1022 1023 if ( c == EOF ) 1024 ppos->state = XML_STATE_ERROR; 1025 else 1026 { 1027 ungetsymbol(ppos, c); 1028 1029 node = xmlNewNode("#PCDATA", ppos->lineno); 1030 if ( node == NULL ) 1031 { 1032 xmlError(ppos, "Can't create new node"); 1033 ppos->state = XML_STATE_ERROR; 1034 } 1035 else 1036 { 1037 BMSduplicateMemoryArray(&node->data, data, strlen(data)+1); 1038 xmlAppendChild(topPstack(ppos), node); 1039 ppos->state = XML_STATE_BEFORE; 1040 } 1041 } 1042 1043 BMSfreeMemoryArray(&data); 1044 } 1045 } 1046 1047 /** Parse input stream */ 1048 static 1049 XML_Bool xmlParse( 1050 PPOS* ppos /**< input stream position */ 1051 ) 1052 { 1053 XML_Bool ok = TRUE; 1054 1055 while (ok) 1056 { 1057 debugMessage("state=%d\n", ppos->state); 1058 1059 switch (ppos->state) 1060 { 1061 case XML_STATE_BEFORE : 1062 procBefore(ppos); 1063 break; 1064 case XML_STATE_IN_TAG : 1065 procInTag(ppos); 1066 break; 1067 case XML_STATE_PCDATA : 1068 procPcdata(ppos); 1069 break; 1070 case XML_STATE_EOF : 1071 ok = FALSE; 1072 break; 1073 case XML_STATE_ERROR : 1074 ok = FALSE; 1075 break; 1076 default : 1077 xmlError(ppos, "Internal Error, illegal state"); 1078 ok = FALSE; 1079 } 1080 } 1081 return (ppos->state == XML_STATE_EOF); 1082 } 1083 1084 /** Parse file */ 1085 XML_NODE* xmlProcess( 1086 const char* filename /**< XML file name */ 1087 ) 1088 { 1089 PPOS ppos; 1090 XML_NODE* node = NULL; 1091 XML_ATTR* attr; 1092 XML_Bool result = FALSE; 1093 char* myfilename; 1094 size_t filenamelen; 1095 1096 /* allocate space and copy filename (possibly modified below) in two steps in order to satisfy valgrind */ 1097 assert( filename != NULL ); 1098 filenamelen = strlen(filename); 1099 if ( BMSallocMemoryArray(&myfilename, filenamelen + 5) == NULL ) 1100 return NULL; 1101 BMScopyMemoryArray(myfilename, filename, filenamelen + 1); 1102 1103 #ifdef SCIP_WITH_ZLIB 1104 if ( access(filename, R_OK) != 0 ) 1105 { 1106 strcat(myfilename, ".gz"); 1107 1108 /* If .gz also does not work, revert to the old name 1109 * to get a better error message. 1110 */ 1111 if ( access(myfilename, R_OK) != 0 ) 1112 (void)SCIPstrncpy(myfilename, filename, (int)filenamelen + 5); 1113 } 1114 #endif 1115 ppos.fp = FOPEN(myfilename, "r"); 1116 if ( ppos.fp == NULL ) 1117 perror(myfilename); 1118 else 1119 { 1120 ppos.filename = myfilename; 1121 ppos.buf[0] = '\0'; 1122 ppos.pos = 0; 1123 ppos.lineno = 1; 1124 ppos.nextsym = 0; 1125 ppos.lastsym = 0; 1126 ppos.state = XML_STATE_BEFORE; 1127 ppos.top = NULL; 1128 1129 node = xmlNewNode("#ROOT", ppos.lineno); 1130 if ( node == NULL ) 1131 { 1132 xmlError(&ppos, "Can't create new node"); 1133 } 1134 else 1135 { 1136 attr = xmlNewAttr("filename", myfilename); 1137 if ( attr == NULL ) 1138 xmlError(&ppos, "Can't create new attribute"); 1139 else 1140 { 1141 xmlAddAttr(node, attr); 1142 1143 /* push root node on stack and start to process */ 1144 if ( pushPstack(&ppos, node) ) 1145 { 1146 result = xmlParse(&ppos); 1147 1148 clearPstack(&ppos); 1149 } 1150 } 1151 } 1152 1153 if ( ! result && (node != NULL) ) 1154 { 1155 xmlErrmsg(&ppos, "Parsing error, processing stopped", TRUE, __FILE__, __LINE__); 1156 xmlFreeNode(node); 1157 node = NULL; 1158 } 1159 if ( FCLOSE(ppos.fp) ) 1160 perror(myfilename); 1161 } 1162 BMSfreeMemoryArray(&myfilename); 1163 1164 return node; 1165 } 1166 1167 1168 1169 1170 1171 1172 /*----------------------------------------------------------------------------------------------*/ 1173 1174 1175 /** create new node */ 1176 XML_NODE* xmlNewNode( 1177 const char* name, 1178 int lineno 1179 ) 1180 { 1181 XML_NODE* n = NULL; 1182 1183 assert(name != NULL); 1184 1185 if ( BMSallocMemory(&n) != NULL ) 1186 { 1187 BMSclearMemory(n); 1188 BMSduplicateMemoryArray(&n->name, name, strlen(name)+1); 1189 n->lineno = lineno; 1190 } 1191 return n; 1192 } 1193 1194 /** create new attribute */ 1195 XML_ATTR* xmlNewAttr( 1196 const char* name, 1197 const char* value 1198 ) 1199 { 1200 XML_ATTR* a = NULL; 1201 1202 assert(name != NULL); 1203 assert(value != NULL); 1204 1205 if ( BMSallocMemory(&a) != NULL ) 1206 { 1207 BMSclearMemory(a); 1208 BMSduplicateMemoryArray(&a->name, name, strlen(name)+1); 1209 BMSduplicateMemoryArray(&a->value, value, strlen(value)+1); 1210 } 1211 return a; 1212 } 1213 1214 /** add attribute */ 1215 void xmlAddAttr( 1216 XML_NODE* n, 1217 XML_ATTR* a 1218 ) 1219 { 1220 assert(n != NULL); 1221 assert(a != NULL); 1222 1223 a->next = n->attrlist; 1224 n->attrlist = a; 1225 } 1226 1227 /** append child node */ 1228 void xmlAppendChild( 1229 XML_NODE* parent, 1230 XML_NODE* child 1231 ) 1232 { 1233 assert(parent != NULL); 1234 assert(child != NULL); 1235 1236 child->parent = parent; 1237 child->prevsibl = parent->lastchild; 1238 child->nextsibl = NULL; 1239 parent->lastchild = child; 1240 1241 if ( child->prevsibl != NULL ) 1242 child->prevsibl->nextsibl = child; 1243 1244 if ( parent->firstchild == NULL ) 1245 parent->firstchild = child; 1246 } 1247 1248 /** free attribute */ 1249 static 1250 void xmlFreeAttr( 1251 XML_ATTR* attr 1252 ) 1253 { 1254 XML_ATTR* a; 1255 1256 /* Note: use an iterative implementation instead of a recursive one; the latter is much slower for large instances 1257 * and might overflow the heap. */ 1258 a = attr; 1259 while (a != NULL) 1260 { 1261 XML_ATTR* b; 1262 b = a->next; 1263 1264 assert(a->name != NULL); 1265 assert(a->value != NULL); 1266 1267 BMSfreeMemoryArray(&a->name); 1268 BMSfreeMemoryArray(&a->value); 1269 BMSfreeMemory(&a); 1270 a = b; 1271 } 1272 } 1273 1274 /** free node */ 1275 void xmlFreeNode( 1276 XML_NODE* node 1277 ) 1278 { 1279 XML_NODE* n; 1280 1281 if ( node == NULL ) 1282 return; 1283 1284 /* Free data from back to front (because free is faster this way). */ 1285 /* Note: use an iterative implementation instead of a recursive one; the latter is much slower for large instances 1286 * and might overflow the heap. */ 1287 n = node->lastchild; 1288 while ( n != NULL ) 1289 { 1290 XML_NODE* m; 1291 m = n->prevsibl; 1292 xmlFreeNode(n); 1293 n = m; 1294 } 1295 1296 xmlFreeAttr(node->attrlist); 1297 1298 if ( node->data != NULL ) 1299 { 1300 BMSfreeMemoryArray(&node->data); 1301 } 1302 assert(node->name != NULL); 1303 1304 BMSfreeMemoryArray(&node->name); 1305 BMSfreeMemory(&node); 1306 } 1307 1308 /** output node */ 1309 void xmlShowNode( 1310 const XML_NODE* root 1311 ) 1312 { 1313 const XML_NODE* n; 1314 const XML_ATTR* a; 1315 1316 assert(root != NULL); 1317 1318 for (n = root; n != NULL; n = n->nextsibl) 1319 { 1320 infoMessage("Name: %s\n", n->name); 1321 infoMessage("Line: %d\n", n->lineno); 1322 infoMessage("Data: %s\n", (n->data != NULL) ? n->data : "***"); 1323 1324 for (a = n->attrlist; a != NULL; a = a->next) 1325 infoMessage("Attr: %s = [%s]\n", a->name, a->value); 1326 1327 if ( n->firstchild != NULL ) 1328 { 1329 infoMessage("->\n"); 1330 xmlShowNode(n->firstchild); 1331 infoMessage("<-\n"); 1332 } 1333 } 1334 } 1335 1336 /** get attribute value */ 1337 const char* xmlGetAttrval( 1338 const XML_NODE* node, 1339 const char* name 1340 ) 1341 { 1342 XML_ATTR* a; 1343 1344 assert(node != NULL); 1345 assert(name != NULL); 1346 1347 for (a = node->attrlist; a != NULL; a = a->next) 1348 { 1349 if ( ! strcmp(name, a->name) ) 1350 break; 1351 } 1352 1353 #ifdef SCIP_DEBUG 1354 if (a == NULL) 1355 infoMessage("Error: Attribute %s in TAG <%s> not found\n", name, node->name); 1356 #endif 1357 1358 return (a == NULL) ? NULL : a->value; 1359 } 1360 1361 /** return first node */ 1362 const XML_NODE* xmlFirstNode( 1363 const XML_NODE* node, 1364 const char* name 1365 ) 1366 { 1367 const XML_NODE* n; 1368 1369 assert(node != NULL); 1370 assert(name != NULL); 1371 1372 for (n = node; n != NULL; n = n->nextsibl) 1373 { 1374 if ( ! strcmp(name, n->name) ) 1375 break; 1376 } 1377 1378 return n; 1379 } 1380 1381 /** return next node */ 1382 const XML_NODE* xmlNextNode( 1383 const XML_NODE* node, 1384 const char* name 1385 ) 1386 { 1387 assert(node != NULL); 1388 assert(name != NULL); 1389 1390 return (node->nextsibl == NULL) ? NULL : xmlFirstNode(node->nextsibl, name); 1391 } 1392 1393 /** find node */ 1394 const XML_NODE* xmlFindNode( 1395 const XML_NODE* node, 1396 const char* name 1397 ) 1398 { 1399 const XML_NODE* n; 1400 const XML_NODE* r; 1401 1402 assert(node != NULL); 1403 assert(name != NULL); 1404 1405 if ( ! strcmp(name, node->name) ) 1406 return node; 1407 1408 for (n = node->firstchild; n != NULL; n = n->nextsibl) 1409 { 1410 r = xmlFindNode(n, name); 1411 if ( r != NULL ) 1412 return r; 1413 } 1414 1415 return NULL; 1416 } 1417 1418 /** find node with bound on the depth */ 1419 const XML_NODE* xmlFindNodeMaxdepth( 1420 const XML_NODE* node, /**< current node - use start node to begin */ 1421 const char* name, /**< name of tag to search for */ 1422 int depth, /**< current depth - start with 0 for root */ 1423 int maxdepth /**< maximal depth */ 1424 ) 1425 { 1426 const XML_NODE* n; 1427 const XML_NODE* r; 1428 1429 assert(node != NULL); 1430 assert(name != NULL); 1431 1432 if ( ! strcmp(name, node->name) ) 1433 return node; 1434 1435 if ( depth < maxdepth ) 1436 { 1437 for (n = node->firstchild; n != NULL; n = n->nextsibl) 1438 { 1439 r = xmlFindNodeMaxdepth(n, name, depth+1, maxdepth); 1440 if ( r != NULL ) 1441 return r; 1442 } 1443 } 1444 1445 return NULL; 1446 } 1447 1448 /** return next sibling */ 1449 const XML_NODE* xmlNextSibl( 1450 const XML_NODE* node 1451 ) 1452 { 1453 assert(node != NULL); 1454 1455 return node->nextsibl; 1456 } 1457 1458 /** return previous sibling */ 1459 const XML_NODE* xmlPrevSibl( 1460 const XML_NODE* node 1461 ) 1462 { 1463 assert(node != NULL); 1464 1465 return node->prevsibl; 1466 } 1467 1468 /** return first child */ 1469 const XML_NODE* xmlFirstChild( 1470 const XML_NODE* node 1471 ) 1472 { 1473 assert(node != NULL); 1474 1475 return node->firstchild; 1476 } 1477 1478 /** return last child */ 1479 const XML_NODE* xmlLastChild( 1480 const XML_NODE* node 1481 ) 1482 { 1483 assert(node != NULL); 1484 1485 return node->lastchild; 1486 } 1487 1488 /** return name of node */ 1489 const char* xmlGetName( 1490 const XML_NODE* node 1491 ) 1492 { 1493 assert(node != NULL); 1494 1495 return node->name; 1496 } 1497 1498 /** get line number */ 1499 int xmlGetLine( 1500 const XML_NODE* node 1501 ) 1502 { 1503 assert(node != NULL); 1504 1505 return node->lineno; 1506 } 1507 1508 /** get data */ 1509 const char* xmlGetData( 1510 const XML_NODE* node 1511 ) 1512 { 1513 assert(node != NULL); 1514 1515 return node->data; 1516 } 1517 1518 /** find PCDATA */ 1519 const char* xmlFindPcdata( 1520 const XML_NODE* node, 1521 const char* name 1522 ) 1523 { 1524 const XML_NODE* n; 1525 1526 assert(node != NULL); 1527 assert(name != NULL); 1528 1529 n = xmlFindNode(node, name); 1530 if ( n == NULL ) 1531 return NULL; 1532 1533 if ( ! strcmp(n->firstchild->name, "#PCDATA") ) 1534 return n->firstchild->data; 1535 1536 return NULL; 1537 } 1538