/*
SDX: Documentary System in XML.
Copyright (C) 2000, 2001, 2002  Ministere de la culture et de la communication (France), AJLSM

Ministere de la culture et de la communication,
Mission de la recherche et de la technologie
3 rue de Valois, 75042 Paris Cedex 01 (France)
mrt@culture.fr, michel.bottin@culture.fr

AJLSM, 17, rue Vital Carles, 33000 Bordeaux (France)
sevigny@ajlsm.com

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the
Free Software Foundation, Inc.
59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
or connect to:
http://www.fsf.org/copyleft/gpl.html
*/
package fr.gouv.culture.sdx.oai;

import fr.gouv.culture.oai.AbstractOAIHarvester;
import fr.gouv.culture.oai.OAIObject;
import fr.gouv.culture.oai.OAIRequest;
import fr.gouv.culture.oai.OAIRequestImpl;
import fr.gouv.culture.sdx.application.Application;
import fr.gouv.culture.sdx.document.Document;
import fr.gouv.culture.sdx.document.IndexableDocument;
import fr.gouv.culture.sdx.document.XMLDocument;
import fr.gouv.culture.sdx.documentbase.*;
import fr.gouv.culture.sdx.exception.SDXException;
import fr.gouv.culture.sdx.exception.SDXExceptionCode;
import fr.gouv.culture.sdx.framework.Framework;
import fr.gouv.culture.sdx.framework.FrameworkImpl;
import fr.gouv.culture.sdx.pipeline.GenericPipeline;
import fr.gouv.culture.sdx.pipeline.Pipeline;
import fr.gouv.culture.sdx.repository.Repository;
import fr.gouv.culture.sdx.utils.ConfigurationUtilities;
import fr.gouv.culture.sdx.utils.Utilities;
import fr.gouv.culture.sdx.utils.database.Database;
import fr.gouv.culture.sdx.utils.database.DatabaseBacked;
import fr.gouv.culture.sdx.utils.database.DatabaseEntity;
import fr.gouv.culture.sdx.utils.database.Property;
import fr.gouv.culture.util.apache.avalon.cornerstone.services.scheduler.*;
import fr.gouv.culture.util.apache.cocoon.xml.SynchronizedXMLConsumerWrapper;
import org.apache.avalon.excalibur.io.FileUtil;
import org.apache.avalon.excalibur.xml.Parser;
import org.apache.avalon.framework.component.ComponentException;
import org.apache.avalon.framework.configuration.Configuration;
import org.apache.avalon.framework.configuration.ConfigurationException;
import org.apache.avalon.framework.parameters.Parameters;
import org.apache.cocoon.Constants;
import org.apache.cocoon.ProcessingException;
import org.apache.cocoon.serialization.XMLSerializer;
import org.apache.cocoon.xml.IncludeXMLConsumer;
import org.apache.cocoon.xml.XMLConsumer;
import org.apache.cocoon.xml.XMLMulticaster;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Date;
import java.util.Enumeration;
import java.util.Hashtable;

/**
 * Created by IntelliJ IDEA.
 * User: rpandey
 * Date: May 12, 2003
 * Time: 12:32:16 PM
 * To change this template use Options | File Templates.
 */
public abstract class AbstractDocumentBaseOAIHarvester extends AbstractOAIHarvester implements Target {

    protected String ELEMENT_NAME_PIPELINE = "pipeline";

    /**The underlying document base*/
    protected DocumentBase docbase = null;
    /**Id of the underlying document base*/
    protected String docbaseId = "";
    /**Properties from the document base*/
    protected Hashtable docBaseProps = null;

    /**Pre-indexation pipeline*/
    protected Pipeline pipe = null;
    /**Underlying database to store any info*/
    protected Database database = null;
    /**Requests in application.xconf*/
    protected Hashtable storedRequests = null;
    /**References to the underlying documentbase's/application's repositories*/
    protected Hashtable storeRepositoriesRefs = null;
    /**Time scheduler for stored requests*/
    protected TimeScheduler scheduler = null;
    /**IDGenerator for this object*/
    protected IDGenerator harvesterIdGen = null;

    //variables for sax stream handling
    protected String TEMPFILE_SUFFIX = ".sdx";
    protected File tempDir = null;
    protected File harvestDoc = null;
    protected FileOutputStream fileOs = null;
    protected XMLDocument urlResource = null;
    protected ArrayList deletedDocs = null;
    protected ArrayList harvestedDocs = null;

    protected boolean keepDeletedRecords = false;//Defaulted, we will delete "deletedRecords" keeping our harvester in sync with source repository
    protected int noRecordsPerBatch = OAIObject.NUMBER_RECORDS_PER_RESPONSE;

    //configuration node names
    /**Configuration node name*/
    protected static final String ELEMENT_NAME_OAI_DATA_PROVIDERS = "oai-data-providers";
    /**Configuration node name*/
    protected static final String ELEMENT_NAME_OAI_VERB = "oai-verb";
    /**Configuration node name*/
    protected static final String ELEMENT_NAME_OAI_IDENTIFIER = "oai-identifier";
    /**Configuration node name*/
    protected static final String ATTRIBUTE_NAME_NAME = "name";
    /**Configuration node name*/
    protected static final String ATTRIBUTE_NAME_ADMIN_EMAIL = OAIObject.Node.Name.ADMIN_EMAIL;
    /**Configuration node name*/
    protected static final String ATTRIBUTE_NAME_USER_AGENT = "userAgent";
    /**Configuration node name*/
    protected static final String ATTRIBUTE_NAME_URL = "url";
    /**Configuration node name*/
    protected static final String ATTRIBUTE_NAME_UPDATE = "update";
    /**Configuration node name*/
    protected static final String ATTRIBUTE_NAME_METADATA_PREFIX = OAIObject.Node.Name.METADATA_PREFIX;
    /**Configuration node name*/
    protected static final String ATTRIBUTE_NAME_SDX_REPOSITORY = "sdxRepository";
    /**Configuration node name*/
    protected static final String ATTRIBUTE_NAME_FROM = "from";
    /**Configuration node name*/
    protected static final String ATTRIBUTE_NAME_UNTIL = "until";
    /**Configuration node name*/
    protected static final String ATTRIBUTE_NAME_SET = "set";
    protected static final String ATTRIBUTE_NAME_KEEP_DELETED_RECORD = "keepDeletedRecords";
    /**Configuration node name*/
    protected static final String ATTRIBUTE_NO_RECORDS_PER_BATCH = "noRecordsPerBatch";


    //strings for internal database properties and sax output
    protected static final String OAI_HARVEST_ID = "oaiHarvestId";
    //internal database property for failed harvests caused by internal errors( errors in this implementation and not OAI repository errors)
    protected static final String OAI_FAILED_HARVEST = "oaiFailedHarvest";
    protected static final String OAI_HARVESTER_LAST_UPDATED = "oaiHarvesterLastUpdated";
    protected static final String OAI_HARVESTER_RESUMPTION_TOKEN = "oaiHarvesterResumptionToken";
    /*Field names for common request parameters*/
    protected static final String OAI_VERB = "oaiVerb";
    protected static final String OAI_IDENTIFIER = "oaiIdentifier";
    protected static final String OAI_METADATA_PREFIX = "oaiMetadataPrefix";
    protected static final String OAI_FROM = "oaiFrom";
    protected static final String OAI_UNTIL = "oaiUntil";
    protected static final String OAI_SET = "oaiSet";
    protected static final String NO_DOCS_DELETED = "noDocDeleted";
    protected static final String NO_DOCS_HARVESTED = "noDocHarvested";


    /**Basic constructor*/
    public AbstractDocumentBaseOAIHarvester(DocumentBase base) {
        this.docbase = base;
        if (this.docbase != null)
            this.docbaseId = this.docbase.getId();
    }

    /**Set's the properties for this object*/
    public void setProperties(Hashtable props) {
        this.docBaseProps = props;
    }


    /**Configures this object*/
    public void configure(Configuration configuration) throws ConfigurationException {

        super.userAgent = configuration.getAttribute(ATTRIBUTE_NAME_USER_AGENT, "SDX OAI Harvester");
        this.keepDeletedRecords = configuration.getAttributeAsBoolean(ATTRIBUTE_NAME_KEEP_DELETED_RECORD, this.keepDeletedRecords);
        this.noRecordsPerBatch = configuration.getAttributeAsInteger(ATTRIBUTE_NO_RECORDS_PER_BATCH, this.noRecordsPerBatch);
        configureAdminEmails(configuration);
        this.configureDataProviders(configuration);
        this.configurePipeline(configuration);
        configureDatabase(configuration);
        configureHarvestIDGenerator(configuration);
    }


    /**Configures the internal database*/
    protected void configureDatabase(Configuration configuration) throws ConfigurationException {
        DatabaseBacked internalDb = new DatabaseBacked();
        try {
            internalDb.setId(getHarvesterId());
            internalDb.compose(this.manager);
            internalDb.setProperties(this.docBaseProps);
            internalDb.configure(configuration);
            internalDb.init();
            this.database = internalDb.getDatabase();
        } catch (SDXException e) {
            throw new ConfigurationException(e.getMessage(), e);
        } catch (ComponentException e) {
            throw new ConfigurationException(e.getMessage(), e);
        }

    }

    /**Configures the id generator for harvests*/
    protected void configureHarvestIDGenerator(Configuration configuration) throws ConfigurationException {
        this.harvesterIdGen = ConfigurationUtilities.configureIDGenerator(this.logger, configuration);
        this.harvesterIdGen.setDatabase(this.database);
    }


    /**Returns an id for this harvester based upon the underlying document base id*/
    protected String getHarvesterId() {
        String hid = "";
        hid += Framework.SDXNamespacePrefix + "_";
        hid += OAIObject.Node.Prefix.OAI + "_";
        hid += "harvester" + "_";
        hid += this.docbaseId;
        return hid;
    }

    /**Configures a list of admin emails
     * can be sub-elements, a single attribute,
     * or both
     *
     * @param configuration
     * @throws ConfigurationException
     */
    protected void configureAdminEmails(Configuration configuration) throws ConfigurationException {
        //configure the admin email
        ArrayList locAdminEmailsList = new ArrayList();
        String firstAdminEmail = configuration.getAttribute(ATTRIBUTE_NAME_ADMIN_EMAIL, null);
        Configuration[] locAdminEmails = configuration.getChildren(ATTRIBUTE_NAME_ADMIN_EMAIL);
        if (Utilities.checkString(firstAdminEmail))
            locAdminEmailsList.add(firstAdminEmail);
        for (int i = 0; i < locAdminEmails.length; i++) {
            Configuration locAdminEmail = locAdminEmails[i];
            if (locAdminEmail != null) {
                String value = locAdminEmail.getValue();
                if (Utilities.checkString(value))
                    locAdminEmailsList.add(value);
            }
        }
        if (locAdminEmailsList.size() <= 0)//no admin email throw an error TODO:make this exception better
            Utilities.checkConfAttributeValue(ATTRIBUTE_NAME_ADMIN_EMAIL, null, configuration.getLocation());

        super.adminEmails = (String[]) locAdminEmailsList.toArray(new String[0]);
        //releasing resources
        locAdminEmailsList.clear();
        locAdminEmailsList = null;

    }

    /**Configures data providers info that can be reused
     * and from which requests can be automatically executed
     *
     * @param configuration
     * @throws ConfigurationException
     * @see #storedRequests
     */
    protected void configureDataProviders(Configuration configuration) throws ConfigurationException {
        if (configuration != null) {
            Configuration dataProvidersConf = configuration.getChild(ELEMENT_NAME_OAI_DATA_PROVIDERS, false);
            if (dataProvidersConf != null) {
                Configuration[] repoRequestConfs = dataProvidersConf.getChildren(AbstractDocumentBase.ELEMENT_NAME_OAI_REPOSITORY);
                if (repoRequestConfs != null) {
                    for (int x = 0; x < repoRequestConfs.length; x++) {
                        Configuration repoRequestConf = repoRequestConfs[x];
                        if (repoRequestConf != null) {
                            if (storedRequests == null) storedRequests = new Hashtable();
                            String repoUrl = repoRequestConf.getAttribute(ATTRIBUTE_NAME_URL);
                            configureStoreRepositories(repoUrl, repoRequestConf);
                            Configuration updateConf = repoRequestConf.getChild(ATTRIBUTE_NAME_UPDATE, false);
                            Configuration[] verbConfs = repoRequestConf.getChildren(ELEMENT_NAME_OAI_VERB);
                            if (verbConfs != null) {
                                for (int y = 0; y < verbConfs.length; y++) {
                                    OAIRequest request = null;
                                    Configuration verbConf = verbConfs[y];
                                    if (verbConf != null) {
                                        String verb = verbConf.getAttribute(ATTRIBUTE_NAME_NAME);
                                        String mdPrefix = verbConf.getAttribute(ATTRIBUTE_NAME_METADATA_PREFIX);
                                        Utilities.checkConfAttributeValue(ATTRIBUTE_NAME_METADATA_PREFIX, mdPrefix, verbConf.getLocation());
                                        if (verb.equalsIgnoreCase(OAIRequest.VERB_STRING_GET_RECORD)) {
                                            verb = OAIRequest.VERB_STRING_GET_RECORD;
                                            Configuration[] idsConf = verbConf.getChildren(ELEMENT_NAME_OAI_IDENTIFIER);
                                            if (idsConf != null) {
                                                for (int z = 0; z < idsConf.length; z++) {
                                                    Configuration idConf = idsConf[z];
                                                    if (idConf != null) {
                                                        String id = idConf.getValue();
                                                        request = new OAIRequestImpl();
                                                        request.enableLogging(this.logger);
                                                        request.setRepositoryURL(repoUrl);
                                                        request.setVerbString(verb);
                                                        request.setMetadataPrefix(mdPrefix);
                                                        request.setIdentifier(id);
                                                        storedRequests.put(request.getRequestURL(), request);
                                                        configureUpdateTriggers(request.getRequestURL(), updateConf);
                                                    }

                                                }
                                            }

                                        } else if (verb.equalsIgnoreCase(OAIRequest.VERB_STRING_LIST_RECORDS)) {
                                            verb = OAIRequest.VERB_STRING_LIST_RECORDS;
                                            String from = verbConf.getAttribute(ATTRIBUTE_NAME_FROM, null);
                                            String until = verbConf.getAttribute(ATTRIBUTE_NAME_UNTIL, null);
                                            String set = verbConf.getAttribute(ATTRIBUTE_NAME_SET, null);
                                            request = new OAIRequestImpl();
                                            request.enableLogging(this.logger);
                                            request.setRepositoryURL(repoUrl);
                                            request.setVerbString(verb);
                                            request.setMetadataPrefix(mdPrefix);
                                            request.setFrom(from);
                                            request.setUntil(until);
                                            request.setSetIdentifier(set);
                                            this.storedRequests.put(request.getRequestURL(), request);
                                            configureUpdateTriggers(request.getRequestURL(), updateConf);
                                        } else//TODOException:
                                            throw new ConfigurationException("this verb action is not supported for harvesting : " + verb);
                                    }


                                }
                            }

                        }


                    }
                    if (this.scheduler != null) this.scheduler.start();
                }

            }

        }
    }

    /**Configures time triggers for
     * stored requests
     *
     * @param requestUrl    The request url
     * @param updateConf    The configuration for updates
     * @throws ConfigurationException
     * @see #scheduler, #storedRequests
     */
    protected void configureUpdateTriggers(String requestUrl, Configuration updateConf) throws ConfigurationException {
        if (Utilities.checkString(requestUrl) && updateConf != null) {
            TimeTrigger trigger = new TimeTriggerFactory().createTimeTrigger(updateConf);
            if (trigger != null) {
                if (this.scheduler == null) this.scheduler = new SimpleTimeScheduler();
                this.scheduler.addTrigger(requestUrl, trigger, this);
            }
        }
    }


    /**Configures the repositories
     * to which data will be stored
     * based upon their repository url
     *
     * @param repoUrl   The repository/data provider url
     * @param oaiRepoConf The configuration
     * @throws ConfigurationException
     */
    protected void configureStoreRepositories(String repoUrl, Configuration oaiRepoConf) throws ConfigurationException {
        if (Utilities.checkString(repoUrl)) {
            String ref = oaiRepoConf.getAttribute(ATTRIBUTE_NAME_SDX_REPOSITORY, null);
            /*check for the sdxrepository attribute, if it exists, get the repository object and add it to the local hashtable*/            if (Utilities.checkString(ref)) {
                Repository repo = null;
                Hashtable appRepos = (Hashtable) this.docBaseProps.get(Application.APPLICATION_REPOSITORIES);
                if (appRepos != null)
                    repo = (Repository) appRepos.get(ref);
                if (repo == null)
                    try {
                        repo = this.docbase.getRepository(ref);
                    } catch (SDXException e) {
                        String[] args = new String[1];
                        args[0] = ref;
                        SDXException sdxE = new SDXException(logger, SDXExceptionCode.ERROR_LOAD_REFERENCED_REPO, args, null);
                        throw new ConfigurationException(sdxE.getMessage(), sdxE);
                    }
                if (this.storeRepositoriesRefs == null) this.storeRepositoriesRefs = new Hashtable();
                //populating the hashtable
                storeRepositoriesRefs.put(repoUrl, repo);
            }
        }


    }


    /**Configures the preIndexation pipeline
     *
     * @param configuration
     * @throws ConfigurationException
     * @see #pipe
     */
    protected void configurePipeline(Configuration configuration) throws ConfigurationException {
        //at this point, we should have a <sdx:pipeline> element
        Configuration pipeConf = configuration.getChild(ELEMENT_NAME_PIPELINE, false);
        //testing if we have something
        if (pipeConf != null) {
            //creating the pipeline and assigning the class field
            this.pipe = new GenericPipeline();
            //setting the logger for the pipeline object
            this.pipe.enableLogging(this.logger);
            //giving the object the cocoon component manager
            try {
                this.pipe.compose(this.manager);
            } catch (ComponentException e) {
                throw new ConfigurationException(e.getMessage(), e);
            }

            //configuring the pipeline object from 'application.xconf'
            this.pipe.configure(pipeConf);
        }
    }

    /**Creates a new temporary directory for
     * writing harvested records before the will
     * be indexed
     * @return
     * @throws SDXException
     * @throws IOException
     */
    protected File getNewTempDir() throws SDXException, IOException {
        File ret = (File) this.docBaseProps.get(Constants.CONTEXT_UPLOAD_DIR);
        String childDir = Utilities.getStringFromHashtable(FrameworkImpl.APP_PATH_NAME, this.docBaseProps) + "_oaiHarvests" + File.separator + this.docbaseId + File.separator + "harvest-" + Utilities.encodeURL(getIsoDate(), null);
        if (Utilities.checkString(super.resumptionToken))
            childDir += "-" + OAIObject.Node.Name.RESUMPTION_TOKEN + "-" + super.resumptionToken;
        if (ret.canWrite())
            ret = new File(ret, childDir);
        else
            ret = new File(Utilities.getSystemTempDir(), childDir);
        Utilities.checkDirectory(ret.getCanonicalPath(), logger);
        return ret;
    }

    /**Deletes the directory
     * represented by the tempDir
     * class field
     *
     */
    protected void deleteTempDir() {
        if (tempDir != null) {
            try {
                FileUtil.deleteDirectory(this.tempDir);//deleting the old tempdir if possible
            } catch (IOException e) {
                Utilities.logWarn(logger, e.getMessage(), e);
            }
        }
    }

    /**Establishes the tempDir class field
     *
     * @throws SDXException
     * @throws IOException
     */
    protected void initTempDir() throws SDXException, IOException {
        this.tempDir = getNewTempDir();
    }

    /**Get's the current date in iso8601 format
     *
     * @return
     */
    protected String getIsoDate() {
        return fr.gouv.culture.sdx.utils.Date.formatDate(fr.gouv.culture.sdx.utils.Date.getUtcIso8601Date());
    }


    /**Sets up resources to capture an oai record
     *
     *
     * @throws SAXException
     */
    protected void prepareRecordCapture() throws SAXException {
        try {
            if (this.tempDir == null || !this.tempDir.exists()) initTempDir();
            this.harvestDoc = File.createTempFile("oaiHarvestedRecord", TEMPFILE_SUFFIX, tempDir);
            this.fileOs = new FileOutputStream(this.harvestDoc);
            XMLSerializer cBytes = new XMLSerializer();
            cBytes.setOutputStream(this.fileOs);
            XMLConsumer newConsumer = null;
            if (firstXmlConsumer != null)
                newConsumer = new XMLMulticaster(cBytes, firstXmlConsumer);
            else
                newConsumer = cBytes;

            super.setSynchronizedConsumer(new SynchronizedXMLConsumerWrapper(newConsumer));
        } catch (IOException e) {
            throw new SAXException(e.getMessage(), e);
        } catch (SDXException e) {
            throw new SAXException(e.getMessage(), e);
        }

    }

    /**Ends the capture of an oai record
     * and renames the file written in cocoon's
     * work directory to correspond to it's id
     * TODO: is this a bad idea, id's could be long?
     *
     * @throws Exception
     */
    protected void captureRecord() throws Exception {
        if (this.fileOs != null && this.harvestDoc != null) {
            this.fileOs.flush();
            this.fileOs.close();
            String encoding = Utilities.getStringFromHashtable(FrameworkImpl.ATTRIBUTE_NAME_ENCODING, this.docBaseProps);
            File newHarvestDoc = new File(tempDir, Utilities.encodeURL(super.currentOaiIdentifier, encoding) + TEMPFILE_SUFFIX);
            this.harvestDoc.renameTo(newHarvestDoc);//TODO:Exception if this fails
            resetRecordCaptureFields(false);
        }
    }

    /**Resets the class fields for record capture
     * possibility deleting the current <code>harvetDoc</code>
     * object underlying file
     *
     * @param deleteDoc flag for deletion of actual file
     */
    protected void resetRecordCaptureFields(boolean deleteDoc) {
        if (this.fileOs != null) {
            try {
                this.fileOs.flush();
                this.fileOs.close();
                this.fileOs = null;
            } catch (IOException e) {
                Utilities.logException(logger, e);
            }
        }

        if (this.harvestDoc != null) {
            if (deleteDoc)
                this.harvestDoc.delete();
            this.harvestDoc = null;
        }

        super.setConsumer(super.firstXmlConsumer);//resetting the consumer to the first external consumer
    }


    /**Sets up resources to delete an oai record
     *
     */
    protected void prepareRecordForDeletion() {
        if (this.deletedDocs == null) this.deletedDocs = new ArrayList();
        if (Utilities.checkString(this.currentOaiIdentifier)) {
            XMLDocument deleteDoc = null;
            try {
                deleteDoc = new XMLDocument(this.currentOaiIdentifier);
            } catch (SDXException e) {
                //do nothing here
            }
            if (deleteDoc != null) this.deletedDocs.add(deleteDoc);
        }
    }


    /**Reads the documents from <code>tempDir</code>
     * and indexes them in the corresponding document
     * base, any marked deletions will be carried out
     * as well
     *
     * @return
     * @throws SDXException
     * @throws SAXException
     * @throws ProcessingException
     */
    protected boolean storeHarvestedData() throws SDXException, SAXException, ProcessingException {
        boolean dataHarvested = false;
        if (docbase != null) {
            //deleting any docs which have been removed from the repo
            Document[] deleteDocs = null;
            if (this.deletedDocs != null && this.deletedDocs.size() > 0)
                deleteDocs = (Document[]) this.deletedDocs.toArray(new Document[0]);
            if (deleteDocs != null) {
                this.docbase.delete(deleteDocs, /*this.contentHandler*/this);
                dataHarvested = true;
            }

            IndexableDocument[] indexDocs = null;
            //creating our docs from the disk
            if (this.tempDir != null) {
                String[] files = this.tempDir.list();
                this.harvestedDocs = new ArrayList();
                ArrayList docs = new ArrayList();
                IndexParameters indexParams = getIndexParameters();
                for (int i = 0; i < files.length; i++) {
                    String fileName = files[i];
                    
                    XMLDocument metadataDoc = new XMLDocument();
                    String id = fileName.substring(0, fileName.lastIndexOf(TEMPFILE_SUFFIX));
                    
                    if (Utilities.checkString(id)) {
                        String encoding = Utilities.getStringFromHashtable(FrameworkImpl.ATTRIBUTE_NAME_ENCODING, this.docBaseProps);
                        metadataDoc.setId(Utilities.decodeURL(id, encoding));
                    }
                    try {
                        metadataDoc.setContent(new File(tempDir, fileName).toURL());
                        if (docs == null) docs = new ArrayList();
                        docs.add(metadataDoc);
                        this.harvestedDocs.add(metadataDoc);//keeping track of all additions
                    } catch (MalformedURLException e) {
                        Utilities.logException(logger, e);
                    }
                    //at every 1000th document we will index into the base and/or at the end of the group
                    int modulusTopLimit = indexParams.getBatchMax() - 1;
                    if (((i % (modulusTopLimit) == 0 || i == (files.length - 1)) && docs.size() > 0)) {
                        indexDocs = (IndexableDocument[]) docs.toArray(new IndexableDocument[0]);
                        if (indexDocs != null) {
                            Repository repo = null;
                            if (Utilities.checkString(this.repoUrl) && storeRepositoriesRefs != null)
                                repo = (Repository) this.storeRepositoriesRefs.get(this.repoUrl);
                            this.docbase.index(indexDocs, repo, indexParams, /*this.contentHandler*/this);
                            dataHarvested = true;
                            docs = null;
                            indexDocs = null;
                        }
                    }
                }
            }


        }
        return dataHarvested;
    }

    /**Handles the resumption token by issuing another request
     * based upon the request from which the resumption token was received.
     *
     */
    protected void handleResumptionToken() {
        if (Utilities.checkString(super.resumptionToken) && Utilities.checkString(super.repoUrl)) {
            String verb = this.requestParams.getParameter(OAIObject.Node.Name.VERB, "");
            if (Utilities.checkString(verb)) {
                String requestUrl = this.repoUrl + OAIRequest.URL_CHARACTER_QUESTION_MARK + OAIObject.Node.Name.VERB + OAIRequest.URL_CHARACTER_EQUALS + verb + OAIRequest.URL_CHARACTER_AMPERSAND + OAIObject.Node.Name.RESUMPTION_TOKEN + OAIRequest.URL_CHARACTER_EQUALS + super.resumptionToken;
                this.resetAllFields();
                try {
                    this.initTempDir();//trying to build a tempDir with the  resumptionToken in the dir name
                } catch (SDXException e) {
                    Utilities.logException(logger, e);//if this fails we log it as there will be another opportunity during the harvest
                } catch (IOException e) {
                    Utilities.logException(logger, e);
                }
                super.resetResumptionToken();//resetting the resumption token here so we don't loop
                this.receiveRequest(requestUrl);
            }
        }
    }

    //TODO-TEST: this should be preparing for a merge with the metadata, done
    /**Prepares to read a url value from an oai record and
     * retrieve the XML behind.
     *@see #identifierName, #currentMetadtaUrlIdentifier
     */
    protected void prepareResourceFromUrlIdentifierCapture() {
        if (Utilities.checkString(this.currentMetadtaUrlIdentifier)) {
            try {
                URL resourceUrl = new URL(this.currentMetadtaUrlIdentifier);
                XMLDocument resource = new XMLDocument();
                resource.setId(this.currentMetadtaUrlIdentifier);
                resource.setContent(resourceUrl);
                this.urlResource = resource;
            } catch (MalformedURLException e) {
                Utilities.logException(logger, e);
            } catch (SDXException e) {
                Utilities.logException(logger, e);
            }
/*
            if (docs == null) docs = new ArrayList();
            docs.add(resource);
*/

            /*InputStream contents = resource.openStream();
            byte[] chars = new byte[contents.available()];
            contents.read(chars, 0, chars.length);
            contents.close();
            docBytes.
            docBytes.write(chars);*/

        }
    }

    /**Captures the xml from a url taken from an oai record and adds
     * it to the oai-record as a sibling of the <metadata/> element
     *
     */
    protected void captureResourceFromUrlIdentifier() {
        if (this.urlResource != null) {
            try {
                IncludeXMLConsumer include = new IncludeXMLConsumer(super.synchronizedXmlConsumer);
                urlResource.setConsumer(include);
                include.startElement(Framework.SDXNamespaceURI, "urlResource", "sdx:urlResource", new AttributesImpl());
                Parser parser = null;
                try {
                    parser = (Parser) this.manager.lookup(Parser.ROLE);
                    urlResource.parse(parser);
                } finally {
                    if (parser != null)
                        this.manager.release(parser);
                    this.urlResource = null;
                    include.endElement(Framework.SDXNamespaceURI, "urlResource", "sdx:urlResource");
                }
            } catch (SAXException e) {
                Utilities.logException(logger, e);
            } catch (ComponentException e) {
                Utilities.logException(logger, e);
            } catch (SDXException e) {
                Utilities.logException(logger, e);
            }
        }

    }


    /**Resets necessary class fields
     *
     */
    protected void resetAllFields() {
        this.deletedDocs = null;
        this.harvestedDocs = null;
        this.urlResource = null;
        resetRecordCaptureFields(false);
        deleteTempDir();
        this.tempDir = null;
    }


    /**Builds simple index parameters for indexation of
     * oai records into the undelryi
     * @return
     */
    protected IndexParameters getIndexParameters() {
        IndexParameters params = new IndexParameters();
        params.setSendIndexationEvents(IndexParameters.SEND_ALL_EVENTS);
        params.setBatchMax(this.noRecordsPerBatch);
        if (this.pipe != null && this.docbase != null) {
            this.pipe.setConsumer(this.docbase.getIndexationPipeline());
            params.setPipeline(this.pipe);
        }
        params.setPipelineParams(super.getHarvestParameters());
        return params;
    }

    /**Sends the details of stored harvesting requests
     * to the current consumer
     *
     * @throws SAXException
     */
    public void sendStoredHarvestingRequests() throws SAXException {
        try {
            if (this.storedRequests != null && this.storedRequests.size() > 0) {
                this.acquire();
                super.acquireSynchronizedXMLConsumer();
                super.startElement(Framework.SDXNamespaceURI,
                        fr.gouv.culture.sdx.utils.constants.Node.Name.STORED_HARVEST_REQUESTS,
                        Framework.SDXNamespacePrefix + ":" + fr.gouv.culture.sdx.utils.constants.Node.Name.STORED_HARVEST_REQUESTS,
                        null);
                Enumeration requests = storedRequests.elements();
                if (requests != null) {
                    while (requests.hasMoreElements()) {
                        OAIRequest request = (OAIRequest) requests.nextElement();
                        if (request != null)
                            request.toSAX(this);
                    }
                }

                super.endElement(Framework.SDXNamespaceURI,
                        fr.gouv.culture.sdx.utils.constants.Node.Name.STORED_HARVEST_REQUESTS, Framework.SDXNamespacePrefix + ":" +
                        fr.gouv.culture.sdx.utils.constants.Node.Name.STORED_HARVEST_REQUESTS);
            }
        } catch (InterruptedException e) {
            throw new SAXException(e.getMessage(), e);
        } finally {
            super.releaseSynchronizedXMLConsumer();
            this.release();
        }

    }


    /**Triggers a oai request to a repository based
     * upon a trigger name (also a request url)
     *
     * @param triggerName
     */
    public synchronized void targetTriggered(String triggerName) {
//DEBUG       System.out.println("i was triggered :" + triggerName);
        //verify that we have this trigger defined
        OAIRequest request = (OAIRequest) this.storedRequests.get(triggerName);
        if (request != null) {
            String requestUrl = request.getRequestURL();
            /*TODO: query datastructs and get laste update time for this url so
            *that we harvest only what may be changed and add this time to the request
            *object
            *TODO:? but should allow force complete re-harvest?
            */
            this.receiveSynchronizedRequest(requestUrl);
//DEBUG     System.out.println("i was requested :" + requestUrl);
        }


    }


    public void startElement(String s, String s1, String s2, Attributes attributes) throws SAXException {
        if (Utilities.checkString(this.identifierName)) {
            /*looking for an element name or the VALUE of an attribute with the name "name" matching super.indentifierName
            *so one could create a metadata identifier element like
            *<super.identifierName>myIdentifierValue</super.identifierName>
            *or
            *<anyElementName name="super.identifierName">myIdentifierValue</anyElementName>
            */
            if (attributes != null && this.identifierName.equals(attributes.getValue(fr.gouv.culture.sdx.utils.constants.Node.Name.NAME)))
                this.captureElemContent = true;
        }
        super.startElement(s, s1, s2, attributes);
    }

    public void endElement(String s, String s1, String s2) throws SAXException {
        if (super.sBuff != null && super.sBuff.length() > 0) {
            String content = super.sBuff.toString();

            if (!OAIObject.Node.Xmlns.OAI_2_0.equals(s)) {
                if (fr.gouv.culture.sdx.utils.constants.Node.Name.FIELD.equals(s1)) {
                    super.currentMetadtaUrlIdentifier = content;
                    try {
                        prepareResourceFromUrlIdentifierCapture();
                    } catch (Exception e) {
                        //if we can't build the document we don't just fail completely we will continue
                        Utilities.logException(logger, e);
                    }
                }
            }

        }
        super.endElement(s, s1, s2);
    }

    /**Querys the underlying data structures
     * based upon current sax flow
     * position/set class fields and
     * determines whether an oai record should be
     * harvested
     *
     * @return boolean indicates whether the record should be handled
     */
    protected boolean shouldHarvestDocument() {
        boolean ret = true;
        try {
            String verb = super.requestParams.getParameter(OAIRequest.URL_PARAM_NAME_VERB, null);
            Parameters params = new Parameters();
            params.setParameter(LuceneDocumentBaseOAIHarvester.OAI_REPOSITORY_URL, super.repoUrl);
            params.setParameter(LuceneDocumentBaseOAIHarvester.OAI_VERB, verb);
            String[] dbes = this.database.search(params);
//            bq.add(new TermQuery(new Term(OAI_VERB, super.requestParams.getParameter(OAIRequest.URL_PARAM_NAME_VERB, OAIRequest.VERB_STRING_LIST_RECORDS))), false, false);
//            bq.add(new TermQuery(new Term(OAI_VERB, super.requestParams.getParameter(OAIRequest.URL_PARAM_NAME_VERB, OAIRequest.VERB_STRING_GET_RECORD))), false, false);
            //this harvest is a result of a resumptionToken request
//             if (Utilities.checkString(resTok))
//                bq.add(new TermQuery(new Term(OAI_HARVESTER_RESUMPTION_TOKEN, resTok)), true, false);
//            Hits hits = getSearchIndex().search(bq);
            if (dbes == null || dbes.length == 0)
                return ret;//no harvest executed as yet
            else {//we'll do comparisons on harvested data
                for (int i = 0; i < dbes.length; i++) {
                    String dbeId = dbes[i];
                    DatabaseEntity dbe = this.database.getEntity(dbeId);
                    /*TODO: determine if this is really necessary as the following comparisons, in theory, are sufficient
                    //first comparison is based on a previously received resumption token
                    String resTok = super.requestParams.getParameter(OAIRequest.URL_PARAM_NAME_RESUMPTION_TOKEN, "");
                    String dbeResTok = dbe.getProperty(OAI_HARVESTER_RESUMPTION_TOKEN);
                    if (Utilities.checkString(dbeResTok) && Utilities.checkString(resTok)) {
                        if (resTok.equals(dbeResTok))
                            return ret;
                    }*/

                    //second comparison: we make a query to see if a document with this identifier exists
                    if (Utilities.checkString(super.currentOaiIdentifier)) {
                        DatabaseEntity docDbe = ((SDXDocumentBase) this.docbase).getDatabase().getEntity(super.currentOaiIdentifier);
                        if (docDbe == null)
                            return ret;//no document with this id exists, so we should harvest it
                    }

                    //third comparison will be based upon the timestamp of the current document vs. the time of the last harvest
                    //for ListRecords or GetRecords
                    String formattedDate = dbe.getProperty(LuceneDocumentBaseOAIHarvester.OAI_HARVESTER_LAST_UPDATED);
                    java.util.Date lastUpdatedDate = fr.gouv.culture.sdx.utils.Date.parseDate(formattedDate);
                    java.util.Date currentDocumentDate = fr.gouv.culture.sdx.utils.Date.parseDate(super.currentDatestamp);
                    if (currentDocumentDate.getTime() > lastUpdatedDate.getTime())
                        return ret;



                    /*document is being delivered via a previously received resumpToken
                    *or
                    *document has not changed since last harvest withing the respective repository
                    *or
                    *the document exists and is upToDate
                    */
                    ret = false;

                }


            }
        } catch (SDXException e) {
            Utilities.logException(logger, e);
        } finally {
            return ret;
        }


    }

    /**Saves critical data about a harvest
     *
     * @param dataHarvested
     * @throws SAXException
     */
    protected void saveCriticalFields(boolean dataHarvested) throws SAXException {
        if (dataHarvested) {//if a harvest took place we store the details
            try {
                String harvestId = generateNewHarvestId();

                DatabaseEntity dbe = new DatabaseEntity(harvestId);//this will be changed later

                if (Utilities.checkString(super.repoUrl))
                    dbe.addProperty(LuceneDocumentBaseOAIHarvester.OAI_REQUEST_URL, super.requestUrl);

                if (Utilities.checkString(super.repoUrl))
                    dbe.addProperty(LuceneDocumentBaseOAIHarvester.OAI_REPOSITORY_URL, super.repoUrl);

                if (requestParams != null) {

                    String verb = requestParams.getParameter(OAIRequest.URL_PARAM_NAME_VERB, null);
                    if (Utilities.checkString(verb)) {

                        String mdPrefix = requestParams.getParameter(OAIRequest.URL_PARAM_NAME_METADATA_PREFIX, null);

                        if (verb.equals(OAIRequest.VERB_STRING_GET_RECORD)) {

                            dbe.addProperty(LuceneDocumentBaseOAIHarvester.OAI_VERB, verb);

                            if (Utilities.checkString(mdPrefix))
                                dbe.addProperty(LuceneDocumentBaseOAIHarvester.OAI_METADATA_PREFIX, mdPrefix);

                            String id = requestParams.getParameter(OAIRequest.URL_PARAM_NAME_IDENTIFIER, null);
                            if (Utilities.checkString(id))
                                dbe.addProperty(LuceneDocumentBaseOAIHarvester.OAI_IDENTIFIER, id);
                        }

                        if (verb.equals(OAIRequest.VERB_STRING_LIST_RECORDS)) {

                            dbe.addProperty(LuceneDocumentBaseOAIHarvester.OAI_VERB, verb);

                            if (Utilities.checkString(mdPrefix))
                                dbe.addProperty(LuceneDocumentBaseOAIHarvester.OAI_METADATA_PREFIX, mdPrefix);

                            String from = requestParams.getParameter(OAIRequest.URL_PARAM_NAME_FROM, null);
                            if (Utilities.checkString(from))
                                dbe.addProperty(LuceneDocumentBaseOAIHarvester.OAI_FROM, from);

                            String until = requestParams.getParameter(OAIRequest.URL_PARAM_NAME_UNTIL, null);
                            if (Utilities.checkString(until))
                                dbe.addProperty(LuceneDocumentBaseOAIHarvester.OAI_UNTIL, until);

                            String set = requestParams.getParameter(OAIRequest.URL_PARAM_NAME_SET, null);
                            if (Utilities.checkString(set))
                                dbe.addProperty(LuceneDocumentBaseOAIHarvester.OAI_SET, set);

                        }

                    }
                }


                dbe.setId(harvestId);
                //deleting old info.
                //adding latest info.
                if (Utilities.checkString(super.responseDate))
                    dbe.addProperty(LuceneDocumentBaseOAIHarvester.OAI_HARVESTER_LAST_UPDATED, super.responseDate);
                if (Utilities.checkString(super.resumptionToken))
                    dbe.addProperty(LuceneDocumentBaseOAIHarvester.OAI_HARVESTER_RESUMPTION_TOKEN, super.resumptionToken);
                if (deletedDocs != null)
                    dbe.addProperty(LuceneDocumentBaseOAIHarvester.NO_DOCS_DELETED, Integer.toString(this.deletedDocs.size()));
                if (harvestedDocs != null)
                    dbe.addProperty(LuceneDocumentBaseOAIHarvester.NO_DOCS_HARVESTED, Integer.toString(this.harvestedDocs.size()));

                this.database.update(dbe);
                this.database.optimize();

            } catch (Exception e) {
                throw new SAXException(e.getMessage(), e);
            }
        }
    }

    /**Generates an id to associate
     * with a harvest
     *
     * @return
     */
    protected String generateNewHarvestId() {
        return this.harvesterIdGen.generate();
    }

    /**Sends sax events to the current consumer
     * with summary details of the all the past harvests
     *
     * @throws SAXException
     */
    public void sendPastHarvestsSummary() throws SAXException {
        //build a query get the doc
        try {
            DatabaseEntity[] dbes = this.database.getEntities();
            if (dbes != null && dbes.length > 0) {
                this.acquire();
                acquireSynchronizedXMLConsumer();
                super.startElement(Framework.SDXNamespaceURI, fr.gouv.culture.sdx.utils.constants.Node.Name.PREVIOUS_HARVESTS,
                        Framework.SDXNamespacePrefix + ":" + fr.gouv.culture.sdx.utils.constants.Node.Name.PREVIOUS_HARVESTS, null);
                for (int i = 0; i < dbes.length; i++) {
                    DatabaseEntity dbe = dbes[i];
                    AttributesImpl atts = new AttributesImpl();
                    String repoUrl = null;
                    if (dbe != null) {
                        Property[] props = dbe.getProperties();
                        if (props != null) {
                            for (int j = 0; j < props.length; j++) {
                                Property prop = props[j];
                                if (prop != null) {
                                    String propName = prop.getName();
                                    if (Utilities.checkString(propName) && !LuceneDocumentBaseOAIHarvester.OAI_HARVEST_ID.equals(propName)) {
                                        String propVal = prop.getValue();
                                        if (propName.equals(LuceneDocumentBaseOAIHarvester.OAI_REPOSITORY_URL)) {
                                            repoUrl = propVal;
                                        } else {
                                            if (Utilities.checkString(propVal))
                                                atts.addAttribute("", propName, propName, OAIObject.Node.Type.CDATA, propVal);
                                        }
                                    }
                                }

                            }
                        }

                    }
                    sendElement(OAIObject.Node.Xmlns.OAI_2_0, OAIObject.Node.Name.REQUEST, OAIObject.Node.Name.REQUEST,
                            atts, repoUrl);
                }
                super.endElement(Framework.SDXNamespaceURI, fr.gouv.culture.sdx.utils.constants.Node.Name.PREVIOUS_HARVESTS,
                        Framework.SDXNamespacePrefix + ":" + fr.gouv.culture.sdx.utils.constants.Node.Name.PREVIOUS_HARVESTS);
            }

        } catch (/*IO*/Exception e) {
            throw  new SAXException(e.getMessage(), e);
        }   /*   Utilities.logException(this.logger, e);TODO: what is the best way to handle this?
        } catch (SDXException e) {
            try {
                e.toSAX(this);
            } catch (ProcessingException e1) {
                Utilities.logException(this.logger, e1);
            }
        } */ finally {
            releaseSynchronizedXMLConsumer();
            this.release();
        }


    }

    /**Retrieves the time when the harvester was last updated
     *
     * @return
     */
    public Date lastUpdated() {
        return this.docbase.lastModificationDate();
    }

    /**Destroys all summary data pertaining to past harvests
     * but not the actual oai records harvested
     *
     */
    public void purgePastHarvestsData() {
        try {
            DatabaseEntity[] dbes = this.database.getEntities();
            for (int i = 0; i < dbes.length; i++) {
                DatabaseEntity dbe = dbes[i];
                this.database.delete(dbe);
            }
            this.database.optimize();
        } catch (SDXException e) {
            Utilities.logException(logger, e);
        }
    }

    /**Stores data about harvesting failures caused
     * by problems other than  oai errors sent from
     * a queried repository
     *
     * @param e
     */
    protected void storeFailedHarvestData(Exception e) {
        try {
            //TODO: call this method when harvesting fails/major exception thrown
            //create a special property "failedHarvest"
            DatabaseEntity dbe = new DatabaseEntity(generateNewHarvestId());
            String message = "noMessage";
            if (e != null) message = e.getMessage();
            dbe.addProperty(OAI_FAILED_HARVEST, message);
            //create a database entity and store all relevant class fields
            dbe.addProperty(OAI_REQUEST_URL, this.requestUrl);
            //save it to the database
            this.database.update(dbe);
            //TODO:later perhaps we can try to auto-reexecute these failed request, keying on the OAI_FAILED_HARVEST property ???
        } catch (SDXException e1) {
            Utilities.logException(logger, e1);
        }
    }


}


