/*
 * Decompiled with CFR 0.152.
 */
package com.pageseeder.url;

import com.pageseeder.base.rule.HostRule;
import com.pageseeder.common.net.URLs;
import com.pageseeder.common.properties.GlobalSettings;
import com.pageseeder.common.properties.Settings;
import com.pageseeder.common.util.Rules;
import com.pageseeder.db.Database;
import com.pageseeder.db.DatabaseException;
import com.pageseeder.db.QueryFailedException;
import com.pageseeder.db.model.Host;
import com.pageseeder.db.util.URIs;
import com.pageseeder.url.URLMetadata;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Serializable;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.SocketException;
import java.net.SocketTimeoutException;
import java.net.URI;
import java.net.URL;
import java.net.UnknownHostException;
import java.time.LocalDateTime;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.net.ssl.SSLException;
import net.sf.ehcache.Cache;
import net.sf.ehcache.CacheManager;
import net.sf.ehcache.Element;
import org.ccil.cowan.tagsoup.HTMLSchema;
import org.ccil.cowan.tagsoup.Parser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public final class URLMetadataExtractor {
    public static final Logger LOGGER = LoggerFactory.getLogger(URLMetadataExtractor.class);
    public static final int DEFAULT_TIMEOUT = 45000;
    private static Cache hostsCache;

    private URLMetadataExtractor() {
    }

    private static void recordHostRequest(String host) {
        hostsCache.put(new Element((Serializable)((Object)host.toLowerCase()), (Serializable)LocalDateTime.now()));
    }

    public static boolean delayHostRequest(String host) {
        Element el = hostsCache.get((Serializable)((Object)host.toLowerCase()));
        if (el == null) {
            return false;
        }
        LocalDateTime accessed = (LocalDateTime)el.getObjectValue();
        try {
            long delay = Long.parseLong(GlobalSettings.get((String)"urlRequestDelay"));
            LocalDateTime now = LocalDateTime.now();
            if (now.isBefore(accessed)) {
                return false;
            }
            return ChronoUnit.SECONDS.between(accessed, now) <= delay;
        }
        catch (NumberFormatException ex) {
            LOGGER.error("Global property urlRequestDelay must be a number");
            return false;
        }
    }

    public static URLMetadata extract(String url, Database db, boolean statusOnly) {
        return statusOnly ? URLMetadata.status(url, URLMetadataExtractor.checkStatus(url, db)) : URLMetadataExtractor.extract(url, db);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    public static URLMetadata.URLStatus checkStatus(String url, Database db) {
        if (!url.startsWith("http://") && !url.startsWith("https://")) {
            return URLMetadata.URLStatus.UNSUPPORTED_SCHEME;
        }
        try {
            URL instance = URI.create(url).toURL();
            URLMetadata.URLStatus status = URLMetadataExtractor.checkHost(instance, db);
            if (status != null) {
                return status;
            }
            HttpURLConnection connection = (HttpURLConnection)instance.openConnection();
            URLMetadataExtractor.setConnectionDefaults(connection, 45000);
            connection.setRequestMethod("HEAD");
            int httpCode = connection.getResponseCode();
            if (httpCode != 200) {
                connection = (HttpURLConnection)URI.create(url).toURL().openConnection();
                URLMetadataExtractor.setConnectionDefaults(connection, 45000);
                connection.setRequestMethod("GET");
                httpCode = connection.getResponseCode();
            }
            try {
                if (httpCode == 200) {
                    URLMetadata.URLStatus uRLStatus = URLMetadata.URLStatus.OK;
                    return uRLStatus;
                }
                if (httpCode == 404) {
                    URLMetadata.URLStatus uRLStatus = URLMetadata.URLStatus.NOT_FOUND;
                    return uRLStatus;
                }
                if (httpCode == 403 || httpCode == 401) {
                    URLMetadata.URLStatus uRLStatus = URLMetadata.URLStatus.REQUIRES_AUTHENTICATION;
                    return uRLStatus;
                }
                if (httpCode >= 300 && httpCode < 400) {
                    URLMetadata.URLStatus uRLStatus = URLMetadata.URLStatus.REDIRECT;
                    return uRLStatus;
                }
                URLMetadata.URLStatus uRLStatus = URLMetadata.URLStatus.HAS_ERROR;
                return uRLStatus;
            }
            finally {
                connection.disconnect();
            }
        }
        catch (IllegalArgumentException | MalformedURLException ex) {
            return URLMetadata.URLStatus.MALFORMED;
        }
        catch (SSLException ex) {
            return URLMetadata.URLStatus.HAS_ERROR;
        }
        catch (DatabaseException ex) {
            LOGGER.error(ex.getMessage(), (Throwable)ex);
            return URLMetadata.URLStatus.HAS_ERROR;
        }
        catch (IOException ex) {
            return URLMetadata.URLStatus.UNREACHABLE;
        }
        catch (Exception ex) {
            return URLMetadata.URLStatus.HAS_ERROR;
        }
    }

    public static URLMetadata extract(String url, Database db) {
        return URLMetadataExtractor.extract(url, db, 45000);
    }

    public static URLMetadata extract(String url, Database db, int timeout) {
        if (!url.startsWith("http://") && !url.startsWith("https://")) {
            return URLMetadata.unsupportedScheme(url);
        }
        try {
            URL instance = URI.create(url).toURL();
            URLMetadata.URLStatus status = URLMetadataExtractor.checkHost(instance, db);
            if (status != null) {
                return URLMetadata.status(url, status);
            }
            HttpURLConnection connection = (HttpURLConnection)instance.openConnection();
            URLMetadataExtractor.setConnectionDefaults(connection, timeout);
            connection.setRequestMethod("GET");
            return URLMetadataExtractor.fetchMetadata(connection);
        }
        catch (IllegalArgumentException | MalformedURLException ex) {
            return URLMetadata.malformed(url);
        }
        catch (SocketException | SocketTimeoutException | UnknownHostException ex) {
            return URLMetadata.unreachable(url);
        }
        catch (SSLException ex) {
            return URLMetadata.status(url, URLMetadata.URLStatus.HAS_ERROR, ex.getMessage());
        }
        catch (DatabaseException ex) {
            LOGGER.error(ex.getMessage(), (Throwable)ex);
            return URLMetadata.status(url, URLMetadata.URLStatus.HAS_ERROR, ex.getMessage());
        }
        catch (IOException ex) {
            return URLMetadata.status(url, URLMetadata.URLStatus.UNREACHABLE, ex.getMessage());
        }
        catch (Exception ex) {
            return URLMetadata.status(url, URLMetadata.URLStatus.HAS_ERROR, ex.getMessage());
        }
    }

    @Deprecated
    public static URLMetadata extract(String url) {
        return URLMetadataExtractor.extract(url, null);
    }

    public static URLMetadata extractNoHostCheck(String url) {
        return URLMetadataExtractor.extract(url, null);
    }

    private static URLMetadata.URLStatus checkHost(URL url, Database db) throws QueryFailedException {
        if (!Rules.isValidHost((String)url.getHost())) {
            return URLMetadata.URLStatus.INVALID_HOST;
        }
        Host host = null;
        String hostname = url.getHost();
        if (db != null && (host = HostRule.resolveHost((Database)db, (String)hostname)) != null) {
            if (host.isVirtual()) {
                return URLMetadata.URLStatus.VIRTUAL;
            }
            if (!host.isExternal() && url.getPath().startsWith(GlobalSettings.getSitePrefix())) {
                if (URIs.matchesPSSourcePath((String)url.getPath())) {
                    return URLMetadata.URLStatus.PS_SOURCE;
                }
                return URLMetadata.URLStatus.INTERNAL;
            }
        }
        URLMetadataExtractor.recordHostRequest(host != null ? host.getName() : hostname);
        return null;
    }

    private static void setConnectionDefaults(HttpURLConnection connection, int timeout) {
        connection.setInstanceFollowRedirects(true);
        connection.setDefaultUseCaches(false);
        connection.setConnectTimeout(timeout);
        connection.setReadTimeout(timeout);
        connection.addRequestProperty("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
        connection.addRequestProperty("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36");
        connection.addRequestProperty("Referer", URLs.asString((String)Settings.getServerScheme(), (String)GlobalSettings.get((String)"webSiteAddress"), (int)Settings.getServerPort(), (String)"/"));
    }

    private static URLMetadata fetchMetadata(HttpURLConnection connection) throws IOException {
        int size;
        HashMap<String, String> metadata = new HashMap<String, String>();
        String url = connection.getURL().toString();
        int httpCode = connection.getResponseCode();
        String contentType = connection.getContentType();
        LOGGER.debug("Fetching metadata for {} - {}", (Object)url, (Object)httpCode);
        metadata.put("http-code", Integer.toString(httpCode));
        String mediaType = URLMetadataExtractor.toMediaType(contentType);
        String charset = URLMetadataExtractor.toCharset(contentType);
        if (mediaType != null) {
            metadata.put("media-type", mediaType);
        }
        if ((size = connection.getContentLength()) >= 0) {
            metadata.put("size", String.valueOf(size));
            metadata.put("content-length", String.valueOf(size));
        }
        List<Object> warnings = Collections.emptyList();
        if (httpCode == 200) {
            if (URLMetadataExtractor.isHtml(contentType)) {
                try (InputStreamReader in = new InputStreamReader(connection.getInputStream(), charset);){
                    warnings = URLMetadataExtractor.parseHtmlContent(in, metadata, url);
                }
                catch (Exception ex) {
                    warnings = Collections.singletonList(ex.getMessage());
                }
            }
        } else {
            if (httpCode == 404) {
                return URLMetadata.notFound(url);
            }
            if (httpCode == 403 || httpCode == 401) {
                return URLMetadata.requiresAuthentication(url);
            }
            if (httpCode >= 300 && httpCode < 400) {
                String location = connection.getHeaderField("Location");
                return URLMetadata.redirect(url, location);
            }
            return URLMetadata.status(url, URLMetadata.URLStatus.HAS_ERROR, "HTTP code: " + httpCode);
        }
        connection.disconnect();
        if (!warnings.isEmpty()) {
            return URLMetadata.warning(url, metadata, String.join((CharSequence)",", warnings));
        }
        return new URLMetadata(url, metadata);
    }

    private static boolean isHtml(String contentType) {
        if (contentType == null) {
            return false;
        }
        return contentType.startsWith("text/html") || contentType.startsWith("application/xhtml+xml");
    }

    private static List<String> parseHtmlContent(Reader reader, Map<String, String> metadata, String url) throws SAXException, IOException {
        Parser parser = new Parser();
        HTMLSchema HTML_SCHEMA = new HTMLSchema();
        HtmlHandler handler = new HtmlHandler(url, metadata);
        parser.setProperty("http://www.ccil.org/~cowan/tagsoup/properties/schema", (Object)HTML_SCHEMA);
        parser.setFeature("http://www.ccil.org/~cowan/tagsoup/features/ignore-bogons", true);
        parser.setContentHandler((ContentHandler)handler);
        parser.parse(new InputSource(reader));
        return handler._warnings;
    }

    private static boolean acceptMeta(String name) {
        if (name == null) {
            return false;
        }
        if ("google-site-verification".equalsIgnoreCase(name)) {
            return false;
        }
        if ("baidu-site-verification".equalsIgnoreCase(name)) {
            return false;
        }
        if ("msvalidate.01".equalsIgnoreCase(name)) {
            return false;
        }
        if ("p:domain_verify".equalsIgnoreCase(name)) {
            return false;
        }
        if ("viewport".equalsIgnoreCase(name)) {
            return false;
        }
        if ("referrer".equalsIgnoreCase(name)) {
            return false;
        }
        if ("robots".equalsIgnoreCase(name)) {
            return false;
        }
        if (name.startsWith("csrf-")) {
            return false;
        }
        return !name.startsWith("msapplication-");
    }

    private static boolean acceptLink(String rel) {
        return "apple-touch-icon".equalsIgnoreCase(rel) || "icon".equalsIgnoreCase(rel) || "shortcut icon".equalsIgnoreCase(rel) || "fluid-icon".equalsIgnoreCase(rel) || "shortlink".equalsIgnoreCase(rel) || "canonical".equalsIgnoreCase(rel) || "license".equalsIgnoreCase(rel);
    }

    private static String fixHref(String href, String url) {
        if (href.startsWith("http://") || href.startsWith("https://")) {
            return href;
        }
        if (href.startsWith("//")) {
            String scheme = url.substring(0, url.indexOf(58) + 1);
            return scheme + href;
        }
        return URI.create(url).resolve(href).toString();
    }

    private static String toMediaType(String contentType) {
        if (contentType != null && contentType.indexOf(59) != -1) {
            return contentType.substring(0, contentType.indexOf(59));
        }
        return contentType;
    }

    private static String toCharset(String contentType) {
        if (contentType == null) {
            return null;
        }
        Matcher matcher = Pattern.compile("charset=(([^()<>@,;:\"/\\[\\]?.=\\s]+)|(\"[^()<>@,;:\"/\\[\\]?.=\\s]+\"))", 2).matcher(contentType);
        if (matcher.find()) {
            return matcher.group(1).replaceAll("\"", "");
        }
        return "utf-8";
    }

    static {
        CacheManager manager = CacheManager.getInstance();
        hostsCache = manager.getCache("url-hosts");
    }

    private static class HtmlHandler
    extends DefaultHandler {
        private final String _url;
        private final Map<String, String> _metadata;
        private StringBuilder title = null;
        private final List<String> _warnings = new ArrayList<String>();
        private static final int MAX_LENGTH = 1024;

        HtmlHandler(String url, Map<String, String> metadata) {
            this._url = url;
            this._metadata = metadata;
        }

        @Override
        public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
            if ("html".equalsIgnoreCase(localName)) {
                if (atts.getValue("lang") != null) {
                    String lang = atts.getValue("lang");
                    if (lang.matches("^([a-zA-Z0-9\\s,-])*$") && lang.length() < 1024) {
                        this._metadata.put("content-language", atts.getValue("lang"));
                    } else {
                        this._warnings.add("Ignored invalid lang value");
                    }
                }
            } else if ("title".equalsIgnoreCase(localName) && this._metadata.get("title") == null) {
                this.title = new StringBuilder();
            } else if ("meta".equalsIgnoreCase(localName)) {
                String value = atts.getValue("content");
                if (value != null && value.length() > 0) {
                    String property;
                    String name = atts.getValue("name");
                    if (URLMetadataExtractor.acceptMeta(name)) {
                        this.putOrMerge(name.toLowerCase(), value.trim());
                    }
                    if (URLMetadataExtractor.acceptMeta(property = atts.getValue("property"))) {
                        this.putOrMerge(property.toLowerCase(), value.trim());
                    }
                }
            } else if ("link".equalsIgnoreCase(localName)) {
                String rel = atts.getValue("rel");
                String href = atts.getValue("href");
                String sizes = atts.getValue("sizes");
                if (URLMetadataExtractor.acceptLink(rel) && href != null && href.length() > 0) {
                    try {
                        href = URLMetadataExtractor.fixHref(href, this._url);
                        if (sizes != null && sizes.length() > 0) {
                            for (String size : sizes.toLowerCase().split("\\s+")) {
                                this._metadata.put(rel.toLowerCase() + "-" + size, href);
                            }
                            if (!this._metadata.containsKey(rel.toLowerCase())) {
                                this._metadata.put(rel.toLowerCase(), href);
                            }
                        } else {
                            this._metadata.put(rel.toLowerCase(), href);
                        }
                    }
                    catch (IllegalArgumentException ex) {
                        this._warnings.add("Ignoring invalid href '" + href + "' in link metadata.");
                    }
                }
            }
        }

        @Override
        public void endElement(String uri, String localName, String qName) throws SAXException {
            if ("title".equalsIgnoreCase(localName) && this.title != null) {
                String actualTitle = this.title.toString().trim().replaceAll("\\s+", " ");
                if (actualTitle.length() > 1024) {
                    this._warnings.add("The title was truncated");
                    this._metadata.put("title", actualTitle.substring(0, 1024));
                } else {
                    this._metadata.put("title", actualTitle);
                }
                this.title = null;
            }
        }

        @Override
        public void characters(char[] ch, int start, int length) {
            if (this.title != null) {
                this.title.append(ch, start, length);
            }
        }

        private void putOrMerge(String key, String value) {
            if (this._metadata.size() > 1024) {
                this._warnings.add("Ignored additional metadata, reached max allowed keys");
                return;
            }
            if (key.length() > 1024) {
                this._warnings.add("Ignored metadata key (too long)");
                return;
            }
            String val = value;
            if (value.length() > 1024) {
                val = value.substring(0, 1024);
                this._warnings.add("Value for '" + key + "' was truncated.");
            }
            this._metadata.merge(key, val, (a, b) -> {
                if (a.length() + b.length() + 1 > 1024) {
                    String warning = "Value for '" + key + "' was truncated.";
                    if (!this._warnings.contains(warning)) {
                        this._warnings.add(warning);
                    }
                    return (a + "," + b).substring(0, 1024);
                }
                return a + "," + b;
            });
        }
    }
}

