/* * * Copyright (C) 1987 Pehong Chen (phc@renoir.berkeley.edu) * Computer Science Division * University of California, Berkeley * */ #include "mkind.h" #include "scanid.h" int idx_lc; /* line count */ int idx_tc; /* total entry count */ int idx_ec; /* erroneous entry count */ int idx_dc; /* number of dots printed so far */ static int first_entry = TRUE; static int comp_len; static char key[ARGUMENT_MAX]; static char no[NUMBER_MAX]; NODE_PTR head; NODE_PTR tail; void scan_idx() { char keyword[STRING_MAX]; int c; int i = 0; int not_eof = TRUE; int arg_count = -1; MESSAGE("Scanning input file %s...", idx_fn); idx_lc = idx_tc = idx_ec = idx_dc = 0; comp_len = strlen(page_comp); while (not_eof) { switch (c = GET_CHAR(idx_fp)) { case EOF: if (arg_count == 2) { idx_lc++; if (make_key()) IDX_DOT(DOT_MAX); arg_count = -1; } else not_eof = FALSE; break; case LFD: idx_lc++; if (arg_count == 2) { if (make_key()) IDX_DOT(DOT_MAX); arg_count = -1; } else if (arg_count > -1) { IDX_ERROR("Missing arguments -- need two (premature LFD).\n", NULL); arg_count = -1; } case TAB: case SPC: break; default: switch (arg_count) { case -1: i = 0; keyword[i++] = (char)c; arg_count++; idx_tc++; break; case 0: if (c == idx_aopen) { arg_count++; keyword[i] = NULL; if (STREQ(keyword, idx_keyword)) { if (! scan_arg1()) { arg_count = -1; } } else { IDX_SKIPLINE; IDX_ERROR("Unknown index keyword %s.\n", keyword); } } else { if (i < STRING_MAX) keyword[i++] = (char)c; else { IDX_SKIPLINE; IDX_ERROR2("Index keyword %s too long (max %d).\n", keyword, STRING_MAX); } } break; case 1: if (c == idx_aopen) { arg_count++; if (! scan_arg2()) { arg_count = -1; } } else { IDX_SKIPLINE; IDX_ERROR("No opening delimiter for second argument (illegal char `%c').\n", c); } break; case 2: IDX_SKIPLINE; IDX_ERROR("No closing delimiter for second argument (illegal character `%c').\n", c); break; } break; } } /* fixup the total counts */ idx_tt += idx_tc; idx_et += idx_ec; DONE(idx_tc - idx_ec, "entries accepted", idx_ec, "rejected"); CLOSE(idx_fp); } static int make_key() { NODE_PTR ptr; int i; /* allocate and initialize a node */ if ((ptr = (NODE_PTR) malloc(sizeof(NODE))) == NULL) FATAL("Not enough core...abort.\n", ""); for (i = 0; i < FIELD_MAX; i++) { ptr->data.sf[i][0] = NULL; ptr->data.af[i][0] = NULL; } ptr->data.lpg[0] = ptr->data.encap[0] = NULL; ptr->data.count = 0; ptr->data.type = EMPTY; /* process index key */ if (! scan_key(&(ptr->data))) return (FALSE); /* determine group type */ ptr->data.group = group_type(ptr->data.sf[0]); /* process page number */ strcpy(ptr->data.lpg, no); if (! scan_no(no, ptr->data.npg, &(ptr->data.count), &(ptr->data.type))) return (FALSE); if (first_entry) { head = tail = ptr; first_entry = FALSE; } else { tail->next = ptr; tail = ptr; } ptr->data.lc = idx_lc; ptr->data.fn = idx_fn; tail->next = NULL; return (TRUE); } static int scan_key(data) FIELD_PTR data; { int i = 0; /* current level */ int n = 0; /* index to the key[] array */ int second_round = FALSE; int last = FIELD_MAX - 1; while (TRUE) { if (key[n] == NULL) break; if (key[n] == idx_encap) { n++; if (scan_field(&n, data->encap, FALSE, FALSE, FALSE)) break; else return (FALSE); } if (key[n] == idx_actual) { n++; if (! scan_field(&n, data->af[i], TRUE, TRUE, FALSE)) return (FALSE); } else { /* Next nesting level */ if (second_round) { i++; n++; } if (i == last) { if (! scan_field(&n, data->sf[i], FALSE, TRUE, TRUE)) return (FALSE); } else { if (! scan_field(&n, data->sf[i], TRUE, TRUE, TRUE)) return (FALSE); } second_round = TRUE; } } /* check for empty fields which shouldn't be empty */ if (*data->sf[0] == NULL) { NULL_RTN; } for (i = 1; i < FIELD_MAX-1; i++) if ((*data->sf[i] == NULL) && ((*data->af[i] != NULL) || (*data->sf[i+1] != NULL))) { NULL_RTN; } /* i == FIELD_MAX-1 */ if ((*data->sf[i] == NULL) && (*data->af[i] != NULL)) { NULL_RTN; } return (TRUE); } static int scan_field(n, field, ck_level, ck_encap, ck_actual) int *n; char field[]; int ck_level; int ck_encap; int ck_actual; { int i = 0; int nbsh; /* backslash count */ while (TRUE) { if (compress_blanks && (key[*n] == SPC)) ++*n; nbsh = 0; while (key[*n] == idx_escape) { nbsh++; field[i++] = key[*n]; ++*n; } if (key[*n] == idx_quote) { if (nbsh%2 == 0) field[i++] = key[++*n]; else field[i++] = key[*n]; } else if ((ck_level && (key[*n] == idx_level)) || (ck_encap && (key[*n] == idx_encap)) || (ck_actual && (key[*n] == idx_actual)) || (key[*n] == NULL)) { if ((i > 0) && compress_blanks && (field[i-1] == SPC)) field[i-1] = NULL; else field[i] = NULL; return (TRUE); } else { field[i++] = key[*n]; if ((! ck_level) && (key[*n] == idx_level)) { IDX_ERROR2("Extra `%c' at position %d of first argument.\n", idx_level, *n+1); return (FALSE); } else if ((! ck_encap) && (key[*n] == idx_encap)) { IDX_ERROR2("Extra `%c' at position %d of first argument.\n", idx_encap, *n+1); return (FALSE); } else if ((! ck_actual) && (key[*n] == idx_actual)) { IDX_ERROR2("Extra `%c' at position %d of first argument.\n", idx_actual, *n+1); return (FALSE); } } ++*n; } } int group_type(str) char *str; { int i = 0; while ((str[i] != NULL) && ISDIGIT(str[i])) i++; if (str[i] == NULL) { sscanf(str, "%d", &i); return (i); } else if (ISSYMBOL(str[0])) return (SYMBOL); else return (ALPHA); } static int scan_no(no, npg, count, type) char no[]; short npg[]; short *count; short *type; { int i = 1; if (isdigit(no[0])) { *type = ARAB; if (! scan_arabic(no, npg, count)) return (FALSE); /* simple heuristic to determine if a letter is Roman or Alpha */ } else if (IS_ROMAN_LOWER(no[0]) && (! IS_COMPOSITOR)) { *type = ROML; if (! scan_roman_lower(no, npg, count)) return (FALSE); /* simple heuristic to determine if a letter is Roman or Alpha */ } else if (IS_ROMAN_UPPER(no[0]) && ((no[0] == ROMAN_I) || (! IS_COMPOSITOR))) { *type = ROMU; if (! scan_roman_upper(no, npg, count)) return (FALSE); } else if (IS_ALPHA_LOWER(no[0])) { *type = ALPL; if (! scan_alpha_lower(no, npg, count)) return (FALSE); } else if (IS_ALPHA_UPPER(no[0])) { *type = ALPU; if (! scan_alpha_upper(no, npg, count)) return (FALSE); } else { IDX_ERROR("Illegal page number %s.\n", no); return (FALSE); } return (TRUE); } static int scan_arabic(no, npg, count) char no[]; short npg[]; short *count; { short i = 0; char str[ARABIC_MAX]; while ((no[i] != NULL) && (i < ARABIC_MAX) && (! IS_COMPOSITOR)) { if (isdigit(no[i])) { str[i] = no[i]; i++; } else { IDX_ERROR2("Illegal Arabic digit: position %d in %s.\n", i+1, no); return (FALSE); } } if (i >= ARABIC_MAX) { IDX_ERROR2("Arabic page number %s too big (max %d digits).\n", no, ARABIC_MAX); return (FALSE); } str[i] = NULL; ENTER(strtoint(str) + page_offset[ARAB]); if (IS_COMPOSITOR) return (scan_no(&no[i+comp_len], npg, count, &i)); else return (TRUE); } static int scan_roman_lower(no, npg, count) char no[]; short npg[]; short *count; { short i = 0; int inp = 0; int prev = 0; int new; while ((no[i] != NULL) && (i < ROMAN_MAX) && (! IS_COMPOSITOR)) { if ((IS_ROMAN_LOWER(no[i])) && (new = ROMAN_LOWER_VAL(no[i]))) { if (prev == 0) prev = new; else { if (prev < new) { prev = new - prev; new = 0; } inp += prev; prev = new; } } else { IDX_ERROR2("Illegal Roman number: position %d in %s.\n", i+1, no); return (FALSE); } i++; } if (i == ROMAN_MAX) { IDX_ERROR2("Roman page number %s too big (max %d digits).\n", no, ROMAN_MAX); return(FALSE); } inp += prev; ENTER(inp + page_offset[ROML]); if (IS_COMPOSITOR) return (scan_no(&no[i+comp_len], npg, count, &i)); else return (TRUE); } static int scan_roman_upper(no, npg, count) char no[]; short npg[]; short *count; { short i = 0; int inp = 0; int prev = 0; int new; while ((no[i] != NULL) && (i < ROMAN_MAX) && (! IS_COMPOSITOR)) { if ((IS_ROMAN_UPPER(no[i])) && (new = ROMAN_UPPER_VAL(no[i]))) { if (prev == 0) prev = new; else { if (prev < new) { prev = new - prev; new = 0; } inp += prev; prev = new; } } else { IDX_ERROR2("Illegal Roman number: position %d in %s.\n", i+1, no); return (FALSE); } i++; } if (i == ROMAN_MAX) { IDX_ERROR2("Roman page number %s too big (max %d digits).\n", no, ROMAN_MAX); return(FALSE); } inp += prev; ENTER(inp + page_offset[ROMU]); if (IS_COMPOSITOR) return (scan_no(&no[i+comp_len], npg, count, &i)); else return (TRUE); } static int scan_alpha_lower(no, npg, count) char no[]; short npg[]; short *count; { short i; ENTER(ALPHA_VAL(no[0]) + page_offset[ALPL]); i = 1; if (IS_COMPOSITOR) return (scan_no(&no[comp_len+1], npg, count, &i)); else return (TRUE); } static int scan_alpha_upper(no, npg, count) char no[]; short npg[]; short *count; { short i; ENTER(ALPHA_VAL(no[0]) + page_offset[ALPU]); i = 1; if (IS_COMPOSITOR) return (scan_no(&no[comp_len+1], npg, count, &i)); else return (TRUE); } static int scan_arg1() { int i = 0; int n = 0; /* delimiter count */ int a; if (compress_blanks) while (((a = GET_CHAR(idx_fp)) == SPC) || (a == TAB)); else a = GET_CHAR(idx_fp); while (i < ARGUMENT_MAX) { if (a == idx_aopen) { /* opening delimiters within the argument list */ key[i++] = (char)a; n++; } else if (a == idx_aclose) { if (n == 0) { if (compress_blanks && key[i-1] == SPC) key[i-1] = NULL; else key[i] = NULL; return (TRUE); } else { key[i++] = (char)a; n--; } } else switch (a) { case LFD: idx_lc++; IDX_ERROR("Incomplete first argument (premature LFD).\n", ""); return (FALSE); case TAB: case SPC: /* compress successive SPC's to one SPC */ if (compress_blanks && (i > 0) && (key[i-1] != SPC) && (key[i-1] != TAB)) { key[i++] = SPC; break; } default: key[i++] = (char)a; break; } a = GET_CHAR(idx_fp); } /* Skip to end of line */ while (GET_CHAR(idx_fp) != LFD); idx_lc++; IDX_ERROR("First argument too long (max %d).\n", ARGUMENT_MAX); return (FALSE); } static int scan_arg2() { int i = 0; int a; int hit_blank = FALSE; while (((a = GET_CHAR(idx_fp)) == SPC) || (a == TAB)); while (i < NUMBER_MAX) { if (a == idx_aclose) { no[i] = NULL; return (TRUE); } else switch (a) { case LFD: idx_lc++; IDX_ERROR("Incomplete second argument (premature LFD).\n", ""); return (FALSE); case TAB: case SPC: hit_blank = TRUE; break; default: if (hit_blank) { while (GET_CHAR(idx_fp) != LFD); idx_lc++; IDX_ERROR("Illegal space within numerals in second argument.\n", ""); return (FALSE); } no[i++] = (char)a; break; } a = GET_CHAR(idx_fp); } /* Skip to end of line */ while (GET_CHAR(idx_fp) != LFD); idx_lc++; IDX_ERROR("Second argument too long (max %d).\n", NUMBER_MAX); return (FALSE); }