Zyte API proxy mode#

To use Zyte API as a proxy, use the api.zyte.com:8011 endpoint, with your API key and proxy headers:

using System;
using System.Net;
using System.Net.Http;

var proxy = new WebProxy("http://api.zyte.com:8011", true);
proxy.Credentials = new NetworkCredential("YOUR_API_KEY", "");

var httpClientHandler = new HttpClientHandler
{
    Proxy = proxy,
};

var client = new HttpClient(handler: httpClientHandler, disposeHandler: true);
var message = new HttpRequestMessage(HttpMethod.Get, "https://toscrape.com");
var response = client.Send(message);
var body = await response.Content.ReadAsStringAsync();

Console.WriteLine(body);
curl \
    --proxy api.zyte.com:8011 \
    --proxy-user YOUR_API_KEY: \
    --compressed \
    https://toscrape.com
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import org.apache.hc.client5.http.auth.AuthCache;
import org.apache.hc.client5.http.auth.AuthScope;
import org.apache.hc.client5.http.auth.CredentialsProvider;
import org.apache.hc.client5.http.classic.methods.HttpGet;
import org.apache.hc.client5.http.impl.auth.BasicAuthCache;
import org.apache.hc.client5.http.impl.auth.BasicScheme;
import org.apache.hc.client5.http.impl.auth.CredentialsProviderBuilder;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.client5.http.impl.routing.DefaultProxyRoutePlanner;
import org.apache.hc.client5.http.protocol.HttpClientContext;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHost;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;

class Example {
  public static void main(final String[] args)
      throws InterruptedException, IOException, ParseException {

    HttpHost proxy = new HttpHost("api.zyte.com", 8011);
    DefaultProxyRoutePlanner routePlanner = new DefaultProxyRoutePlanner(proxy);
    CredentialsProvider credentialsProvider =
        CredentialsProviderBuilder.create()
            .add(new AuthScope(proxy), "YOUR_API_KEY", "".toCharArray())
            .build();

    AuthCache authCache = new BasicAuthCache();
    BasicScheme basicAuth = new BasicScheme();
    authCache.put(proxy, basicAuth);
    HttpClientContext context = HttpClientContext.create();
    context.setCredentialsProvider(credentialsProvider);
    context.setAuthCache(authCache);

    CloseableHttpClient client =
        HttpClients.custom()
            .setRoutePlanner(routePlanner)
            .setDefaultCredentialsProvider(credentialsProvider)
            .build();

    HttpGet request = new HttpGet("https://toscrape.com");
    client.execute(
        request,
        response -> {
          HttpEntity entity = response.getEntity();
          String httpResponseBody = EntityUtils.toString(entity, StandardCharsets.UTF_8);
          System.out.println(httpResponseBody);
          return null;
        });
  }
}
const axios = require('axios')

axios
  .get(
    'https://toscrape.com',
    {
      proxy: {
        protocol: 'http',
        host: 'api.zyte.com',
        port: 8011,
        auth: {
          username: 'YOUR_API_KEY',
          password: ''
        }
      }
    }
  )
  .then((response) => {
    const httpResponseBody = response.data
    console.log(httpResponseBody)
  })
<?php

$client = new GuzzleHttp\Client();
$response = $client->request('GET', 'https://toscrape.com', [
    'proxy' => 'http://YOUR_API_KEY:@api.zyte.com:8011',
]);
$http_response_body = (string) $response->getBody();
fwrite(STDOUT, $http_response_body);
import requests

response = requests.get(
    "https://toscrape.com",
    proxies={
        scheme: "http://YOUR_API_KEY:@api.zyte.com:8011" for scheme in ("http", "https")
    },
)
http_response_body: bytes = response.content
print(http_response_body.decode())
# frozen_string_literal: true

require 'net/http'

url = URI('https://toscrape.com/')
proxy_host = 'api.zyte.com'
proxy_port = '8011'

http = Net::HTTP.new(url.host, url.port, proxy_host, proxy_port, 'YOUR_API_KEY', '')
http.use_ssl = true

r = http.start do |h|
  h.request(Net::HTTP::Get.new(url))
end

puts r.body

When using scrapy-zyte-smartproxy, set the ZYTE_SMARTPROXY_URL setting to "http://api.zyte.com:8011" and the ZYTE_SMARTPROXY_APIKEY setting to your API key for Zyte API.

Then you can continue using Scrapy as usual and all requests will be proxied through Zyte API automatically.

from scrapy import Spider


class ToScrapeSpider(Spider):
    name = "toscrape_com"
    start_urls = ["https://toscrape.com"]

    def parse(self, response):
        print(response.text)

Key differences#

The proxy mode makes it easier to migrate existing code that uses a proxy service. However, the proxy mode and the HTTP API have some key differences:

Feature

HTTP API

Proxy mode

Parameter definition

Request body

Request headers

Browser HTML

Yes

Yes (new!)

Screenshots

Yes

No

Browser actions

Yes

No

Network capture

Yes

No

Disable JS on browser

Yes

No

Automatic extraction

Yes

No

Server-managed sessions

Yes

No

Echo data

Yes

No

Overhead

Some

Minimum

Cookie definition

Multi-domain

Target domain only

Overhead#

When using HTTP requests, the HTTP API introduces some overhead in responses due mainly to the base64-encoding of httpResponseBody, increasing network traffic and latency, and requiring base64-decoding on your end.

In contrast, with proxy mode the only overhead you get is some additional response headers.

Request headers#

The following headers allow changing how a request is sent through Zyte API in proxy mode.

Zyte-Browser-Html#

Sets browserHtml.

This is not compatible with Zyte-Disable-Follow-Redirect.

Example

Note

Install and configure code example requirements and the Zyte CA certificate to run the example below.

curl \
    --proxy api.zyte.com:8011 \
    --proxy-user YOUR_API_KEY: \
    --compressed \
    -H "Zyte-Browser-Html: true" \
    https://toscrape.com
using System;
using System.Net;
using System.Net.Http;

var proxy = new WebProxy("http://api.zyte.com:8011", true);
proxy.Credentials = new NetworkCredential("YOUR_API_KEY", "");

var httpClientHandler = new HttpClientHandler
{
    Proxy = proxy,
};

var client = new HttpClient(handler: httpClientHandler, disposeHandler: true);
client.DefaultRequestHeaders.Add("Zyte-Browser-Html", "true");
var message = new HttpRequestMessage(HttpMethod.Get, "https://toscrape.com");
var response = client.Send(message);
var body = await response.Content.ReadAsStringAsync();

Console.WriteLine(body);
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import org.apache.hc.client5.http.auth.AuthCache;
import org.apache.hc.client5.http.auth.AuthScope;
import org.apache.hc.client5.http.auth.CredentialsProvider;
import org.apache.hc.client5.http.classic.methods.HttpGet;
import org.apache.hc.client5.http.impl.auth.BasicAuthCache;
import org.apache.hc.client5.http.impl.auth.BasicScheme;
import org.apache.hc.client5.http.impl.auth.CredentialsProviderBuilder;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.client5.http.impl.routing.DefaultProxyRoutePlanner;
import org.apache.hc.client5.http.protocol.HttpClientContext;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHost;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;

class Example {
  public static void main(final String[] args)
      throws InterruptedException, IOException, ParseException {

    HttpHost proxy = new HttpHost("api.zyte.com", 8011);
    DefaultProxyRoutePlanner routePlanner = new DefaultProxyRoutePlanner(proxy);
    CredentialsProvider credentialsProvider =
        CredentialsProviderBuilder.create()
            .add(new AuthScope(proxy), "YOUR_API_KEY", "".toCharArray())
            .build();

    AuthCache authCache = new BasicAuthCache();
    BasicScheme basicAuth = new BasicScheme();
    authCache.put(proxy, basicAuth);
    HttpClientContext context = HttpClientContext.create();
    context.setCredentialsProvider(credentialsProvider);
    context.setAuthCache(authCache);

    CloseableHttpClient client =
        HttpClients.custom()
            .setRoutePlanner(routePlanner)
            .setDefaultCredentialsProvider(credentialsProvider)
            .build();

    HttpGet request = new HttpGet("https://toscrape.com");
    request.setHeader("Zyte-Browser-Html", "true");
    client.execute(
        request,
        response -> {
          HttpEntity entity = response.getEntity();
          String httpResponseBody = EntityUtils.toString(entity, StandardCharsets.UTF_8);
          System.out.println(httpResponseBody);
          return null;
        });
  }
}
const axios = require('axios')

axios
  .get(
    'https://toscrape.com',
    {
      headers: {
        'Zyte-Browser-Html': 'true'
      },
      proxy: {
        protocol: 'http',
        host: 'api.zyte.com',
        port: 8011,
        auth: {
          username: 'YOUR_API_KEY',
          password: ''
        }
      }
    }
  )
  .then((response) => {
    const httpResponseBody = response.data
    console.log(httpResponseBody)
  })
<?php

$client = new GuzzleHttp\Client();
$response = $client->request('GET', 'https://toscrape.com', [
    'headers' => [
        'Zyte-Browser-Html' => 'true',
    ],
    'proxy' => 'http://YOUR_API_KEY:@api.zyte.com:8011',
]);
$http_response_body = (string) $response->getBody();
fwrite(STDOUT, $http_response_body);
import requests

response = requests.get(
    "https://toscrape.com",
    headers={
        "Zyte-Browser-Html": "true",
    },
    proxies={
        scheme: "http://YOUR_API_KEY:@api.zyte.com:8011" for scheme in ("http", "https")
    },
)
http_response_body: bytes = response.content
print(http_response_body.decode())
# frozen_string_literal: true

require 'net/http'

url = URI('https://toscrape.com/')
proxy_host = 'api.zyte.com'
proxy_port = '8011'

http = Net::HTTP.new(url.host, url.port, proxy_host, proxy_port, 'YOUR_API_KEY', '')
http.use_ssl = true

request = Net::HTTP::Get.new(url)
request['Zyte-Browser-Html'] = 'true'

r = http.start do |h|
  h.request(request)
end

puts r.body
from scrapy import Request, Spider


class ToScrapeSpider(Spider):
    name = "toscrape_com"

    def start_requests(self):
        yield Request("https://toscrape.com", headers={"Zyte-Browser-Html": "true"})

    def parse(self, response):
        print(response.text)

Output (first 5 lines):

<!DOCTYPE html><html lang="en"><head>
        <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
        <title>Scraping Sandbox</title>
        <link href="./css/bootstrap.min.css" rel="stylesheet">
        <link href="./css/main.css" rel="stylesheet">

Zyte-Client#

May be used to report to Zyte the software being used to access Zyte API.

It should be formatted with the syntax of the User-Agent header, e.g. curl/1.2.3.

Zyte-Device#

Sets device emulation.

Zyte-Disable-Follow-Redirect#

When set to true, disables redirect following, which is enabled by default.

Zyte-Geolocation#

Sets a geolocation.

Zyte-IPType#

Sets ipType.

Zyte-JobId#

Sets the ID of the Scrapy Cloud job that is sending the request.

scrapy-zyte-smartproxy sets this header automatically when used from a Scrapy Cloud job.

Zyte-Override-Headers#

Zyte API automatically sends some request headers for ban avoidance.

Custom headers from your request will override most automatic headers, but not these:

Accept
Accept-Encoding
User-Agent

To override any of these 3 headers, set Zyte-Override-Headers to a comma-separated list of names of headers to override, e.g. Zyte-Override-Headers: Accept,Accept-Encoding.

Warning

Overriding headers can break Zyte API ban avoidance.

Zyte-Session-ID#

Sets session.id for a client-managed session.

Invalid request headers#

The following headers are not allowed, and any request with one or more of them will result in an HTTP 400 response:

Client-IP
Cluster-Client-IP
Forwarded-For
True-Client-IP
Via
X-Client-IP
X-Forwarded
X-Forwarded-For
X-Forwarded-Host
X-Host
X-Original-URL
X-Originating-IP
X-ProxyUser-IO
X-ProxyUser-IP
X-Remote-Addr
X-Remote-IP

Response headers#

Responses include some headers injected by Zyte API.

Note that the response body of unsuccessful responses is always the actual JSON response from the HTTP API that provides error details.

Zyte-Error#

The presence of this header indicates that the response was unsuccessful.

It’s value should be ignored and not relied upon, as it is an internal error ID subject to change at any time.

Zyte-Error-Title#

A short summary of the problem type. Written in English and readable for engineers, usually not suited for non-technical stakeholders, and not localized.

It matches the title JSON field of the error response.

Zyte-Error-Type#

A URI reference that uniquely identifies the problem type, only in the context of the provided API.

Opposed to the specification in RFC-7807, it is neither recommended to be dereferencable and point to human-readable documentation nor globally unique for the problem type.

It matches the type JSON field of the error response.

Zyte-Request-ID#

A unique identifier of the request.

When reporting an issue about the outcome of a request to us, please include the value of this response header when possible.

HTTPS proxy#

Tip

The main endpoint works both for HTTP and HTTPS URLs, you do not need an HTTPS proxy interface to access HTTPS URLs.

You can use the api.zyte.com:8014 endpoint for an HTTPS proxy interface, provided your tech stack supports HTTPS proxies and you have installed our CA certificate:

curl \
    --proxy https://api.zyte.com:8014 \
    --proxy-user YOUR_API_KEY: \
    --compressed \
    https://toscrape.com
const HttpsProxyAgent = require('https-proxy-agent')
const httpsAgent = new HttpsProxyAgent.HttpsProxyAgent('https://YOUR_API_KEY:@api.zyte.com:8014')
const axiosDefaultConfig = { httpsAgent }
const axios = require('axios').create(axiosDefaultConfig)

axios
  .get('https://toscrape.com')
  .then((response) => {
    const httpResponseBody = response.data
    console.log(httpResponseBody)
  })
import requests

response = requests.get(
    "https://toscrape.com",
    proxies={
        scheme: "https://YOUR_API_KEY:@api.zyte.com:8014"
        for scheme in ("http", "https")
    },
)
http_response_body: bytes = response.content
print(http_response_body.decode())

Use with browser automation tools#

The proxy mode is not optimized for use in combination with browser automation tools. Please, consider using Zyte API’s browser automation features instead. See Migrating from browser automation to Zyte API.