Using Smart Proxy Manager with Java

Note

Because of HTTPCLIENT-1649 you should use version 4.5 or later of HttpComponents Client.

This is how the The Apache HttpComponents example looks with Smart Proxy Manager support:

import java.io.File;
import javax.net.ssl.SSLContext;
import org.apache.http.HttpHeaders;
import org.apache.http.HttpHost;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.AuthCache;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.conn.ssl.TrustSelfSignedStrategy;
import org.apache.http.impl.auth.BasicScheme;
import org.apache.http.impl.client.BasicAuthCache;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicHeader;
import org.apache.http.ssl.SSLContexts;
import org.apache.http.util.EntityUtils;

    public class ClientProxyAuthentication {

        public static void main(String[] args) throws Exception {

            // Trust own CA and all self-signed certs
            SSLContext sslcontext = SSLContexts.custom()
                    .loadTrustMaterial(new File("/path/to/jre/lib/security/cacerts"),
                                       "changeit".toCharArray(),
                                       new TrustSelfSignedStrategy())
                    .build();

            // Allow TLSv1.2 protocol only
            SSLConnectionSocketFactory sslsf = new SSLConnectionSocketFactory(
                    sslcontext, new String[] {"TLSv1.2"},
                    null,
                    SSLConnectionSocketFactory.getDefaultHostnameVerifier());

            CredentialsProvider credsProvider = new BasicCredentialsProvider();
            credsProvider.setCredentials(
                    new AuthScope("proxy.zyte.com", 8011),
                    new UsernamePasswordCredentials("<API KEY>", ""));

            try (CloseableHttpClient httpclient = HttpClients.custom()
                    .setDefaultCredentialsProvider(credsProvider)
                    .setSSLSocketFactory(sslsf)
                    .build())
            {
                HttpHost target = new HttpHost("twitter.com", 443, "https");
                HttpHost proxy = new HttpHost("proxy.zyte.com", 8011);

                AuthCache authCache = new BasicAuthCache();

                BasicScheme basicAuth = new BasicScheme();
                basicAuth.processChallenge(
                        new BasicHeader(HttpHeaders.PROXY_AUTHENTICATE,
                                        "Basic realm=\"Crawlera\""));
                authCache.put(proxy, basicAuth);

                HttpClientContext ctx = HttpClientContext.create();
                ctx.setAuthCache(authCache);

                RequestConfig config = RequestConfig.custom()
                    .setProxy(proxy)
                    .build();

                HttpGet httpget = new HttpGet("/");
                httpget.setConfig(config);

                System.out.println("Executing request " + httpget.getRequestLine() +
                    " to " + target + " via " + proxy);

                try (CloseableHttpResponse response = httpclient.execute(
                    target, httpget, ctx))
                {
                    System.out.println("----------------------------------------");
                    System.out.println(response.getStatusLine());
                    System.out.println("----------------------------------------");
                    System.out.println(EntityUtils.toString(response.getEntity()));
                    EntityUtils.consume(response.getEntity());
                }
            }
        }
    }

zyte-smartproxy-ca.crt should be added to keystore, for instance with keytool:

keytool -import -file /path/to/zyte-smartproxy-ca.crt -storepass changeit -keystore $JAVA_HOME/jre/lib/security/cacerts -alias crawleracert

Warning

Some HTTP client libraries including Apache HttpComponents Client and .NET don’t send authentication headers by default. This can result in doubled requests so pre-emptive authentication should be enabled where this is the case. In the above example we are making HTTPS requests to https://twitter.com through Smart Proxy Manager. It is assumed that Smart Proxy Manager certificate has been installed, since CONNECT method will be employed.