/*
 * This file is part of Mable+, a program for checking MAB data for errors.
 *
 * Copyright (C) 2008, 2011-2012 Kooperativer Bibliotheksverbund
 * Berlin-Brandenburg (KOBV) <http://www.kobv.de>,
 * im Konrad-Zuse-Zentrum für Informationstechnik
 * Berlin (ZIB) <http://www.zib.de>, Takustr. 7, D-14195 Berlin-Dahlem
 * Author(s) Jens Schwidder, <schwidder(at)zib.de>,
 *           Pascal-Nicolas Becker, <becker(at)zib.de>
 *
 * This program is free software: you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the Free
 * Software Foundation, either version 3 of the License, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program. If not, see <http://www.gnu.org/licenses/>.
 */
package de.kobv.mable.util;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Parses ERRORS report.
 * @author Jens Schwidder <schwidder(at)zib.de>
 * TODO extend to represent ERRORS report + marshalling/unmarshalling
 * TODO move to specific package
 */
public class ErrorReportParser {

    /**
     * Set of IDs for datasets with errors.
     */
    private Set<String> datasetsWithErrors = new HashSet<String>();

    /**
     * Set of error codes.
     */
    private Set<String> errors = new HashSet<String>();

    /**
     * Count of errors in report.
     */
    private int errorsCount;

    /**
     * Parses ERRORS report file.
     * @param in Reader for data
     * @throws IOException if error occurs reading the data
     */
    public void parse(final Reader in) throws IOException {
        errorsCount = 0;

        BufferedReader buf = new BufferedReader(in);

        Pattern pattern = Pattern.compile("(\\S+?), #(\\d{4})");
        // , #(\\d{4})");

        String line;

        while ((line = buf.readLine()) != null) {
            Matcher matcher = pattern.matcher(line);
            if (matcher.find()) {
                errorsCount++;
                String datasetId = matcher.group(1);
                String errorCode = matcher.group(2);
                processEntry(datasetId, errorCode);
            }
        }

    }

    public void processEntry(final String satzId, final String errorCode) {
        datasetsWithErrors.add(satzId);
        errors.add(errorCode);
    }

    /**
     * Returns the number of unique dataset IDs in report.
     * @return integer - Anzahl von Datensätzen mit Fehlern
     */
    public int getDatasetWithErrorsCount() {
        return datasetsWithErrors.size();
    }

    /**
     * Returns the number of unique error codes in report.
     */
    public int getDifferentErrorsCount() {
        return errors.size();
    }

    /**
     * Returns the number of errors.
     */
    public int getErrorsCount() {
        return errorsCount;
    }

}
