Deprecated: 函数 get_currentuserinfo 自版本 4.5.0 起已弃用!请使用 wp_get_current_user() 替代。 in /data/home/qxu1142130176/htdocs/wp-includes/functions.php on line 5383
最新消息:

JSOUP 抓取HTTPS/HTTP网页校验问题

Java 前端收藏 1830浏览

JSOUP通过HTTP抓取网页时能够正常工作,但是抓取HTTPS网页时,它会引发以下异常:

javax.net.ssl.SSLHandshakeException: java.security.cert.CertPathValidatorException: Trust anchor for certification path not found.

通过以下代码,可以实现HTTPS网页的抓取,让我们的站点信任所有站点,不需要引包,系统自带ssl证书校验:

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.security.SecureRandom;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;

import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSession;
import javax.net.ssl.X509TrustManager;


public class HttpsUtil {
    /**
     * 日志
     */
    private static final Logger logger = LoggerFactory.getLogger(HttpsUtil.class);

    /**
     * 信任任何站点,实现https页面的正常访问
     */
    public static void trustEveryone() {
        try {
            HttpsURLConnection.setDefaultHostnameVerifier(new HostnameVerifier() {
                public boolean verify(String hostname, SSLSession session) {
                    return true;
                }
            });

            SSLContext context = SSLContext.getInstance("TLS");
            context.init(null, new X509TrustManager[]{new X509TrustManager() {
                public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException {
                }

                public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException {
                }

                public X509Certificate[] getAcceptedIssuers() {
                    return new X509Certificate[0];
                }
            }}, new SecureRandom());
            HttpsURLConnection.setDefaultSSLSocketFactory(context.getSocketFactory());
        } catch (Exception e) {
            // e.printStackTrace();
        }
    }

}

最后直接在调用JSOUP请求HTTPS的前面调用即可:

String pageurl = "https://www.baidu.com";
logger.info("〓〓〓〓〓获取的URL:{}",pageurl);
Document doc = JsoupUtil.getHtmlDoc(pageurl);

 

转载请注明:前端收藏 » JSOUP 抓取HTTPS/HTTP网页校验问题