package de.duehl.html.download.logic;

/*
 * Copyright 2017 Christian Dühl. All rights reserved.
 *
 * This program is free software. You can redistribute it and/or
 * modify it under the same terms as perl:
 *
 * general:  http://dev.perl.org/licenses/
 * GPL:      http://dev.perl.org/licenses/gpl1.html
 * artistic: http://dev.perl.org/licenses/artistic.html
 */

import java.io.IOException;
import java.net.URI;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.http.HttpEntity;
import org.apache.http.StatusLine;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpResponseException;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.util.EntityUtils;

import de.duehl.basics.io.exceptions.IORuntimeException;
import de.duehl.basics.logging.Logger;
import de.duehl.basics.text.html.HtmlTool;
import de.duehl.html.download.data.DownloadInfo;
import de.duehl.html.download.data.DownloadParameters;
import de.duehl.html.download.data.DownloadStatus;
import de.duehl.html.download.data.RedirectHandling;
import de.duehl.html.download.proxy.RotatingProxies;

/**
 * Diese Klasse kümmert sich um den eigentlichen Download mittels des übergebenen
 * ClosableHttpClients.
 *
 * Links siehe InternalDownloader.
 *
 * @version 1.01     2017-06-21
 * @author Christian Dühl
 */

public class HttpGetter {

    private static final String REQUEST_HEADER_ACCEPT_LANGUAGE =
            "de-de,de;q=0.8,en-us;q=0.5,en;q=0.3";

    private static final String META_REDIRECT_START = "<meta http-equiv=\"refresh\" content=\"";

    /** Die Parameter für den Download. */
    private final DownloadParameters parameters;

    /** Information über den Download (und Inhalt der Seite). */
    private final DownloadInfo downloadInfo;

    /**
     * Konstruktor.
     *
     * @param parameters
     *            Die Parameter für den Download.
     * @param downloadInfo
     *            Information über den Download (und Inhalt der Seite).
     */
    public HttpGetter(DownloadParameters parameters, DownloadInfo downloadInfo) {
        this.parameters = parameters;
        this.downloadInfo = downloadInfo;
    }

    /** Führt den eigentlichen Download auf dem HttpClient aus. */
    public void loadHttpGet(CloseableHttpClient httpClient) {
        log("Start");
        HttpGet httpGet = new HttpGet(parameters.getWebsite());
        httpGet.addHeader("Accept-Language", REQUEST_HEADER_ACCEPT_LANGUAGE);

        loadHttpGet(httpClient, httpGet);
        log("Ende");
    }

    private void loadHttpGet(CloseableHttpClient httpClient, HttpGet httpGet) {
        try {
            tryToLoadHttpGet(httpClient, httpGet);
        }
        catch (IOException exception) {
            handleDownloadException(exception);
        }
    }

    private void tryToLoadHttpGet(CloseableHttpClient httpClient, HttpGet httpGet)
            throws IOException, ClientProtocolException {
        log("Start");
        CloseableHttpResponse response = httpClient.execute(httpGet);
        log("response = " + response);

        try {
            workOnResponse(httpGet, response);
        }
        catch (Exception exception) {
            downloadInfo.pageNotFound();
            log("Website " + parameters.getWebsite() + " nicht gefunden");
        }
        finally {
            response.close();
        }
    }

    private void workOnResponse(HttpGet httpGet, CloseableHttpResponse response)
            throws HttpResponseException, IOException {
        int statusCode = getStatusCode(response);
        log("statusCode = " + statusCode);

        HttpEntity entity = response.getEntity();

        String content;
        String redirectUrlToFollow = "";
        if (statusCode == 302 || statusCode == 303) {
            redirectUrlToFollow = response.getFirstHeader("Location").getValue();
            log("Status Code 302 / 303 via Location-Header: " + redirectUrlToFollow);
            content = "";
        }
        else if (statusCode == 301 || statusCode == 307) {
            redirectUrlToFollow = response.getFirstHeader("Location").getValue();
            log("Status Code 301 / 307 via Location-Header: " + redirectUrlToFollow);
            content = "";
        }
        else if (statusCode == 200) {
            content = getHtmlContents(response);
            if (isRedirectByMetaHeader(content)) {
                log("Bei Status Code 200 Redirect über meta header im HTML gefunden!");
                redirectUrlToFollow = getRedirectByMetaHeader(content);
                if (redirectUrlToFollow.isEmpty()) {
                    log("Redirect über meta header im HTML - keine Seite gefunden");
                }
                else {
                    log("redirect auf " + redirectUrlToFollow);
                }
            }
        }
        else {
            content = getHtmlContents(response);
        }
        log("redirectUrlToFollow = " + redirectUrlToFollow);

        EntityUtils.consume(entity);

        String returnedUrl = getReturned(httpGet);
        log("returnedUrl = " + returnedUrl);

        DownloadStatus status = DownloadStatus.getFromStatusCode(statusCode);
        log("status = " + status);

        downloadInfo.storeResult(status, statusCode, returnedUrl, content, redirectUrlToFollow);
    }

    /*
     *  http://www.google.com
     *  http://tinyurl.com/2tx  -> http://www.google.com
     */

    /** Gibt an, ob im HTML-Code ein Redirect per Meta-Header vorliegt. */
    private boolean isRedirectByMetaHeader(String content) {
        if (content.contains(META_REDIRECT_START)) {
            String cleanedContext = HtmlTool.removeHtmlComments(content);
            return cleanedContext.contains(META_REDIRECT_START);
        }
        else {
            return false;
        }
    }

    /**
     * Getter für die Url, der im Falle eines Redirects per Meta-Header zu folgen ist. Falls die
     * Umleitung nicht gefunden wird, dann wird der leere String zurück gegeben.
     */
    private String getRedirectByMetaHeader(String content) {
        /*
         * So ein Header sieht folgendermaßen aus:
         * <meta http-equiv="refresh" content="0; URL=http://www.spechtpartner.eu">
         */
        String regex = META_REDIRECT_START + "\\d+; ?URL ?= ?([^\"]+)\"";
        Pattern pattern = Pattern.compile(regex);
        String cleanedContext = HtmlTool.removeHtmlComments(content);
        Matcher matcher = pattern.matcher(cleanedContext);
        if (matcher.find()) {
            String redirect = matcher.group(1).trim();
            log("redirect = " + redirect);
            return redirect;
        }
        return "";
    }

    private int getStatusCode(CloseableHttpResponse response) {
        StatusLine statusLine = response.getStatusLine();
        int statusCode = statusLine.getStatusCode();

        return statusCode;
    }

    /**
     * Beschafft den HTML-Code in UTF-8. Und in genau dieser Kodierung möchte man ihn haben, da
     * Java intern Zeichenketten in Unicode vorhält!
     */
    private String getHtmlContents(CloseableHttpResponse response)
            throws HttpResponseException, IOException {
        String contents = new BasicResponseHandler().handleResponse(response);
        return contents;
    }

    private String getReturned(HttpGet httpGet) {
        RedirectHandling redirectHandling = parameters.getRedirectHandling();
        boolean redirectHandledByCaller = redirectHandling.isRedirectHandledByCaller();
        if (redirectHandledByCaller) {
            return getReturnedUrlForHandledRedirectsByCaller(httpGet);
        }
        else {
            return getReturnedUrlForDefault(httpGet);
        }
    }

    /**
     * Siehe
     * http://stackoverflow.com/questions/1456987/httpclient-4-how-to-capture-last-redirect-url
     */
    private String getReturnedUrlForHandledRedirectsByCaller(HttpGet httpGet) {
        URI returned = httpGet.getURI();
        String returnedUrl = returned.toString();
        log("returnedUrl = " + returnedUrl);
        return returnedUrl;
    }

    /**
     * Siehe
     * http://stackoverflow.com/questions/1456987/httpclient-4-how-to-capture-last-redirect-url
     * http://stackoverflow.com/questions/10341475/
     *       getting-url-after-a-redirect-using-httpclient-executehttpget
     *
     * ML:
     *
     * 301 302 306 307
     * Die einen können auch 200 sein...
     * -> ModuleFindTrueWebSite\
     * @param httpGet
     */
    private String getReturnedUrlForDefault(HttpGet httpGet) {
        throw new RuntimeException("No solution yet!");
        /*
         * HttpContext context = new BasicHttpContext();
         * HttpResponse response = httpClient.execute(httpGet, context);
         * if (response.getStatusLine().getStatusCode() != HttpStatus.SC_OK) {
         *     throw new IOException(response.getStatusLine().toString());
         * }
         * HttpUriRequest currentReq = (HttpUriRequest) context.getAttribute(
         *         ExecutionContext.HTTP_REQUEST);
         * HttpHost currentHost = (HttpHost)  context.getAttribute(
         *         ExecutionContext.HTTP_TARGET_HOST);
         * String currentUrl = (currentReq.getURI().isAbsolute())
         *         ? currentReq.getURI().toString()
         *         : (currentHost.toURI() + currentReq.getURI());
         * return currentUrl;
         *
         * Ist teilweise veraltet und unklar, ich nehme erstmal die andere Variante...
         * dafür brauche ich eine Lösung... aber nicht so dringend, durch das selbst
         * redirecten entfällt die Notwendigkeit.
         */

        //log("===> Achtung, den redirectedUrls ist nicht zu trauen! <===");
        //return getReturnedUrlForHandledRedirectsByCaller(httpGet);
    }

    private void handleDownloadException(IOException exception) {
        log("Download failed");
        downloadInfo.failed();

        perhapsRotateRotatingProxy(exception.getMessage());

        throw new IORuntimeException(exception);
    }

    private void perhapsRotateRotatingProxy(String message) {
        if (null == message) {
            return;
        }

        // hier auf Connection timed out reagieren...
        if (message.contains("Connection Refused") || message.contains("Connection timed out")) {
            // TODO: bei geblocktem Proxy Ergebnis genau anschauen!
            log("Proxy geblockt?!");
            if (parameters.useRotatingProxies()) {
                RotatingProxies rotatingProxies = parameters.getRotatingProxies();
                rotatingProxies.switchProxy();
                log("Schalte rotierenden Proxy weiter auf " + rotatingProxies.getProxy());
            }
        }
        /*
         * - handshake_failure
         * - 443
         */
        /*
         * Hier kommt tatsächlich:
         *     Connect to firewall40.heinsundpartner.de:3128
         *     [firewall40.heinsundpartner.de/192.168.32.40] failed:
         *     Connection timed out: connect
         * (in einer Zeile)
         * Das obige "Proxy geblockt?!" wird ins Log geschrieben!
         * Weiterschaltung funktioniert. Aber leider versucht er nicht weiter den Download.
         */
    }

    /** Loggt die übergebene Nachricht. */
    private void log(String message) {
        if (parameters.weHaveALogger()) {
            Logger logger = parameters.getLogger();
            logger.log(message, 1);
        }
    }

}
