Zyte API shared features#

Learn here about Zyte API features that you can use with both HTTP requests and browser automation: geolocation, cookies, redirection, response headers, and metadata.

Geolocation#

Set the geolocation key in your API request body to a supported ISO 3166-1 alpha-2 country code to channel your request through an IP address associated with the corresponding country.

Example

Note

Install and configure code example requirements to run the example below.

using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;

HttpClientHandler handler = new HttpClientHandler()
{
    AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);

var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);

client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");

var input = new Dictionary<string, object>(){
    {"url", "http://ip-api.com/json"},
    {"httpResponseBody", true},
    {"geolocation", "AU"}
};
var inputJson = JsonSerializer.Serialize(input);
var content = new StringContent(inputJson, Encoding.UTF8, "application/json");

HttpResponseMessage response = await client.PostAsync("https://api.zyte.com/v1/extract", content);
var body = await response.Content.ReadAsByteArrayAsync();

var data = JsonDocument.Parse(body);
var base64HttpResponseBody = data.RootElement.GetProperty("httpResponseBody").ToString();
var httpResponseBody = System.Convert.FromBase64String(base64HttpResponseBody);

var responseData = JsonDocument.Parse(httpResponseBody);
var countryCode = responseData.RootElement.GetProperty("countryCode").ToString();
input.json#
{"url": "http://ip-api.com/json", "httpResponseBody": true, "geolocation": "AU"}
curl \
    --user YOUR_API_KEY: \
    --header 'Content-Type: application/json' \
    --data @input.json \
    --compressed \
    https://api.zyte.com/v1/extract \
| jq --raw-output .httpResponseBody \
| base64 --decode \
| jq .countryCode
input.jsonl#
{"url": "http://ip-api.com/json", "httpResponseBody": true, "geolocation": "AU"}
zyte-api input.jsonl 2> /dev/null \
| jq --raw-output .httpResponseBody \
| base64 --decode \
| jq .countryCode
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Map;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;

class Example {
  private static final String API_KEY = "YOUR_API_KEY";

  public static void main(final String[] args)
      throws InterruptedException, IOException, ParseException {
    Map<String, Object> parameters =
        ImmutableMap.of(
            "url", "http://ip-api.com/json", "httpResponseBody", true, "geolocation", "AU");
    String requestBody = new Gson().toJson(parameters);

    HttpPost request = new HttpPost("https://api.zyte.com/v1/extract");
    request.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
    request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
    request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
    request.setEntity(new StringEntity(requestBody));

    try (CloseableHttpClient client = HttpClients.createDefault()) {
      try (CloseableHttpResponse response = client.execute(request)) {
        HttpEntity entity = response.getEntity();
        String apiResponse = EntityUtils.toString(entity, StandardCharsets.UTF_8);
        JsonObject jsonObject = JsonParser.parseString(apiResponse).getAsJsonObject();
        String base64HttpResponseBody = jsonObject.get("httpResponseBody").getAsString();
        byte[] httpResponseBodyBytes = Base64.getDecoder().decode(base64HttpResponseBody);
        String httpResponseBody = new String(httpResponseBodyBytes, StandardCharsets.UTF_8);
        JsonObject data = JsonParser.parseString(httpResponseBody).getAsJsonObject();
        String countryCode = data.get("countryCode").getAsString();
      }
    }
  }

  private static String buildAuthHeader() {
    String auth = API_KEY + ":";
    String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
    return "Basic " + encodedAuth;
  }
}
const axios = require('axios')

axios.post(
  'https://api.zyte.com/v1/extract',
  {
    url: 'http://ip-api.com/json',
    httpResponseBody: true,
    geolocation: 'AU'
  },
  {
    auth: { username: 'YOUR_API_KEY' }
  }
).then((response) => {
  const httpResponseBody = Buffer.from(
    response.data.httpResponseBody,
    'base64'
  )
  const data = JSON.parse(httpResponseBody)
  const countryCode = data.countryCode
})
<?php

$client = new GuzzleHttp\Client();
$response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
    'auth' => ['YOUR_API_KEY', ''],
    'headers' => ['Accept-Encoding' => 'gzip'],
    'json' => [
        'url' => 'http://ip-api.com/json',
        'httpResponseBody' => true,
        'geolocation' => 'AU',
    ],
]);
$api = json_decode($response->getBody());
$http_response_body = base64_decode($api->httpResponseBody);
$data = json_decode($http_response_body);
$country_code = $data->countryCode;
import json
from base64 import b64decode

import requests

api_response = requests.post(
    'https://api.zyte.com/v1/extract',
    auth=('YOUR_API_KEY', ''),
    json={
        'url': 'http://ip-api.com/json',
        'httpResponseBody': True,
        'geolocation': 'AU',
    },
)
http_response_body: bytes = b64decode(
    api_response.json()['httpResponseBody']
)
response_data = json.loads(http_response_body)
country_code = response_data['countryCode']
import asyncio
import json
from base64 import b64decode

from zyte_api.aio.client import AsyncClient

async def main():
    client = AsyncClient()
    api_response = await client.request_raw(
        {
            'url': 'http://ip-api.com/json',
            'httpResponseBody': True,
            'geolocation': 'AU',
        }
    )
    http_response_body: bytes = b64decode(
        api_response['httpResponseBody']
    )
    response_data = json.loads(http_response_body)
    country_code = response_data['countryCode']

asyncio.run(main())
import json

from scrapy import Request, Spider


class IPAPIComSpider(Spider):
    name = "ip_api_com"

    def start_requests(self):
        yield Request(
            "http://ip-api.com/json",
            meta={
                "zyte_api_automap": {
                    "geolocation": "AU",
                },
            },
        )

    def parse(self, response):
        response_data = json.loads(response.body)
        country_code = response_data["countryCode"]

Look up the geolocation key in the specification for the list of supported countries.

When the geolocation key is not specified, Zyte API aims to channel your request through a country that ensures a good response from the target website, meaning that the chosen country:

  • Does not cause unexpected locale changes in the response data, such as the wrong language, currency, date format, time zone, etc.

  • Does not cause your request to be banned.

Zyte API can use countries of origin beyond those supported by the geolocation key. For example, if you access a Turkish website, Zyte API may access the website from Türkiye as long as you do not specify otherwise through the geolocation key, even though geolocation does not support TR as a value at the moment.

Cookies#

Warning

Cookie support is currently experimental. Breaking API changes may be introduced at any moment.

Set the following keys within the experimental key in your API request body to handle cookies:

  • To include cookies in a request set the requestCookies key to an array of objects representing those cookies.

    Each cookie object requires name, value, and domain keys. For a complete list of supported keys, see the reference.

  • Set the responseCookies key to true to get website response cookies included in the matching key of your Zyte API response.

    responseCookies also works with browser features. For example, you can send a request to extract browser HTML and response cookies from a URL, and then use those cookies with the requestCookies key of requests that extract a response body.

Example

Note

Install and configure code example requirements to run the example below.

The following code example sends a cookie to httpbin.org and prints the cookies that httpbin.org reports to have received:

using System;
using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;

HttpClientHandler handler = new HttpClientHandler()
{
    AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);

var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);

client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");

var input = new Dictionary<string, object>(){
    {"url", "https://httpbin.org/cookies"},
    {"httpResponseBody", true},
    {
        "experimental",
        new Dictionary<string, object>()
        {
            {
                "requestCookies",
                new List<Dictionary<string, string>>()
                {
                    new Dictionary<string, string>()
                    {
                        {"name", "foo"},
                        {"value", "bar"},
                        {"domain", "httpbin.org"}
                    }
                }
            }
        }
    }
};
var inputJson = JsonSerializer.Serialize(input);
var content = new StringContent(inputJson, Encoding.UTF8, "application/json");

HttpResponseMessage response = await client.PostAsync("https://api.zyte.com/v1/extract", content);
var body = await response.Content.ReadAsByteArrayAsync();

var data = JsonDocument.Parse(body);
var base64HttpResponseBody = data.RootElement.GetProperty("httpResponseBody").ToString();
var httpResponseBody = System.Convert.FromBase64String(base64HttpResponseBody);
var result = System.Text.Encoding.UTF8.GetString(httpResponseBody);

Console.WriteLine(result);
input.json#
{
    "url": "https://httpbin.org/cookies",
    "httpResponseBody": true,
    "experimental": {
        "requestCookies": [
            {
                "name": "foo",
                "value": "bar",
                "domain": "httpbin.org"
            }
        ]
    }
}
curl \
    --user YOUR_API_KEY: \
    --header 'Content-Type: application/json' \
    --data @input.json \
    --compressed \
    https://api.zyte.com/v1/extract \
| jq --raw-output .httpResponseBody \
| base64 --decode
input.jsonl#
{"url":"https://httpbin.org/cookies","httpResponseBody":true,"experimental":{"requestCookies":[{"name":"foo","value":"bar","domain":"httpbin.org"}]}}
zyte-api input.jsonl \
| jq --raw-output .httpResponseBody \
| base64 --decode
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Collections;
import java.util.Map;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;

class Example {
  private static final String API_KEY = "YOUR_API_KEY";

  public static void main(final String[] args)
      throws InterruptedException, IOException, ParseException {
    Map<String, String> cookies =
        ImmutableMap.of("name", "foo", "value", "bar", "domain", "httpbin.org");
    Map<String, Object> experimental =
        ImmutableMap.of("requestCookies", Collections.singletonList(cookies));
    Map<String, Object> parameters =
        ImmutableMap.of(
            "url",
            "https://httpbin.org/cookies",
            "httpResponseBody",
            true,
            "experimental",
            experimental);
    String requestBody = new Gson().toJson(parameters);

    HttpPost request = new HttpPost("https://api.zyte.com/v1/extract");
    request.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
    request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
    request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
    request.setEntity(new StringEntity(requestBody));

    try (CloseableHttpClient client = HttpClients.createDefault()) {
      try (CloseableHttpResponse response = client.execute(request)) {
        HttpEntity entity = response.getEntity();
        String apiResponse = EntityUtils.toString(entity, StandardCharsets.UTF_8);
        JsonObject jsonObject = JsonParser.parseString(apiResponse).getAsJsonObject();
        String base64HttpResponseBody = jsonObject.get("httpResponseBody").getAsString();
        byte[] httpResponseBodyBytes = Base64.getDecoder().decode(base64HttpResponseBody);
        String httpResponseBody = new String(httpResponseBodyBytes, StandardCharsets.UTF_8);
        System.out.println(httpResponseBody);
      }
    }
  }

  private static String buildAuthHeader() {
    String auth = API_KEY + ":";
    String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
    return "Basic " + encodedAuth;
  }
}
const axios = require('axios')

axios.post(
  'https://api.zyte.com/v1/extract',
  {
    url: 'https://httpbin.org/cookies',
    httpResponseBody: true,
    experimental: {
      requestCookies: [
        {
          name: 'foo',
          value: 'bar',
          domain: 'httpbin.org'
        }
      ]
    }
  },
  {
    auth: { username: 'YOUR_API_KEY' }
  }
).then((response) => {
  const httpResponseBody = Buffer.from(
    response.data.httpResponseBody,
    'base64'
  )
  console.log(httpResponseBody.toString())
})
<?php

$client = new GuzzleHttp\Client();
$response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
    'auth' => ['YOUR_API_KEY', ''],
    'headers' => ['Accept-Encoding' => 'gzip'],
    'json' => [
        'url' => 'https://httpbin.org/cookies',
        'httpResponseBody' => true,
        'experimental' => [
            'requestCookies' => [
                [
                    'name' => 'foo',
                    'value' => 'bar',
                    'domain' => 'httpbin.org',
                ],
            ],
        ],
    ],
]);
$api = json_decode($response->getBody());
$http_response_body = base64_decode($api->httpResponseBody);
echo $http_response_body;
from base64 import b64decode

import requests

api_response = requests.post(
    "https://api.zyte.com/v1/extract",
    auth=("YOUR_API_KEY", ""),
    json={
        "url": "https://httpbin.org/cookies",
        "httpResponseBody": True,
        "experimental": {
            "requestCookies": [
                {
                    "name": "foo",
                    "value": "bar",
                    "domain": "httpbin.org",
                },
            ],
        },
    },
)
http_response_body = b64decode(api_response.json()["httpResponseBody"])
print(http_response_body.decode())
import asyncio
from base64 import b64decode

from zyte_api.aio.client import AsyncClient


async def main():
    client = AsyncClient()
    api_response = await client.request_raw(
        {
            "url": "https://httpbin.org/cookies",
            "httpResponseBody": True,
            "experimental": {
                "requestCookies": [
                    {
                        "name": "foo",
                        "value": "bar",
                        "domain": "httpbin.org",
                    },
                ],
            },
        }
    )
    http_response_body = b64decode(api_response["httpResponseBody"])
    print(http_response_body.decode())


asyncio.run(main())
from scrapy import Request, Spider


class QuotesToScrapeComSpider(Spider):
    name = "quotes_toscrape_com"

    def start_requests(self):
        yield Request(
            "https://httpbin.org/cookies",
            meta={
                "zyte_api_automap": {
                    "experimental": {
                        "requestCookies": [
                            {
                                "name": "foo",
                                "value": "bar",
                                "domain": "httpbin.org",
                            },
                        ],
                    },
                },
            },
        )

    def parse(self, response):
        print(response.text)

Output:

{
  "cookies": {
    "foo": "bar"
  }
}

Redirection#

Zyte API always follows HTTP redirections.

On browser requests, redirections triggered by HTML or JavaScript are also followed.

Response headers#

Set the httpResponseHeaders key in your API request body to true to extract response headers.

When you do, the Zyte API response includes an httpResponseHeaders key with the headers as an array of objects with name and value keys.

Example

Note

Install and configure code example requirements to run the example below.

using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;

HttpClientHandler handler = new HttpClientHandler()
{
    AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);

var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);

client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");

var input = new Dictionary<string, object>(){
    {"url", "https://toscrape.com"},
    {"httpResponseHeaders", true}
};
var inputJson = JsonSerializer.Serialize(input);
var content = new StringContent(inputJson, Encoding.UTF8, "application/json");

HttpResponseMessage response = await client.PostAsync("https://api.zyte.com/v1/extract", content);
var body = await response.Content.ReadAsByteArrayAsync();

var data = JsonDocument.Parse(body);
var headerEnumerator = data.RootElement.GetProperty("httpResponseHeaders").EnumerateArray();
var headers = new Dictionary<string, string>();
while (headerEnumerator.MoveNext())
{
    headers.Add(
        headerEnumerator.Current.GetProperty("name").ToString(),
        headerEnumerator.Current.GetProperty("value").ToString()
    );
}
input.json#
{"url": "https://toscrape.com", "httpResponseHeaders": true}
curl \
    --user YOUR_API_KEY: \
    --header 'Content-Type: application/json' \
    --data @input.json \
    --compressed \
    https://api.zyte.com/v1/extract \
| jq .httpResponseHeaders
input.jsonl#
{"url": "https://toscrape.com", "httpResponseHeaders": true}
zyte-api input.jsonl 2> /dev/null \
| jq .httpResponseHeaders
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Map;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;

class Example {
  private static final String API_KEY = "YOUR_API_KEY";

  public static void main(final String[] args)
      throws InterruptedException, IOException, ParseException {
    Map<String, Object> parameters =
        ImmutableMap.of(
            "url", "https://toscrape.com", "browserHtml", true, "httpResponseHeaders", true);
    String requestBody = new Gson().toJson(parameters);

    HttpPost request = new HttpPost("https://api.zyte.com/v1/extract");
    request.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
    request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
    request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
    request.setEntity(new StringEntity(requestBody));

    try (CloseableHttpClient client = HttpClients.createDefault()) {
      try (CloseableHttpResponse response = client.execute(request)) {
        HttpEntity entity = response.getEntity();
        String apiResponse = EntityUtils.toString(entity, StandardCharsets.UTF_8);
        JsonObject jsonObject = JsonParser.parseString(apiResponse).getAsJsonObject();
        JsonArray httpResponseHeaders = jsonObject.get("httpResponseHeaders").getAsJsonArray();
      }
    }
  }

  private static String buildAuthHeader() {
    String auth = API_KEY + ":";
    String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
    return "Basic " + encodedAuth;
  }
}
const axios = require('axios')

axios.post(
  'https://api.zyte.com/v1/extract',
  {
    url: 'https://toscrape.com',
    httpResponseHeaders: true
  },
  {
    auth: { username: 'YOUR_API_KEY' }
  }
).then((response) => {
  const httpResponseHeaders = response.data.httpResponseHeaders
})
<?php

$client = new GuzzleHttp\Client();
$response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
    'auth' => ['YOUR_API_KEY', ''],
    'headers' => ['Accept-Encoding' => 'gzip'],
    'json' => [
        'url' => 'https://toscrape.com',
        'httpResponseHeaders' => true,
    ],
]);
$api = json_decode($response->getBody());
$http_response_headers = $api->httpResponseHeaders;
import requests

api_response = requests.post(
    'https://api.zyte.com/v1/extract',
    auth=('YOUR_API_KEY', ''),
    json={
        'url': 'https://toscrape.com',
        'httpResponseHeaders': True,
    },
)
http_response_headers = api_response.json()['httpResponseHeaders']
import asyncio

from zyte_api.aio.client import AsyncClient

async def main():
    client = AsyncClient()
    api_response = await client.request_raw(
        {
            'url': 'https://toscrape.com',
            'httpResponseHeaders': True,
        }
    )
    http_response_headers = api_response['httpResponseHeaders']

asyncio.run(main())
from scrapy import Request, Spider


class ToScrapeComSpider(Spider):
    name = "toscrape_com"

    def start_requests(self):
        yield Request(
            "https://toscrape.com",
            meta={
                "zyte_api_automap": {
                    "httpResponseBody": False,
                    "httpResponseHeaders": True,
                },
            },
        )

    def parse(self, response):
        headers = response.headers

Note

In transparent mode, httpResponseHeaders is sent by default for httpResponseBody requests, but sending it explicitly is still recommended, as future versions of scrapy-zyte-api may stop sending it by default.

Metadata#

Set the echoData key in your API request body to an arbitrary value, to get that value verbatim in the API response.

When sending multiple requests in parallel, this can be useful, for example, to keep track of the original request order.

Example

Note

Install and configure code example requirements to run the example below.

using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;

var inputData = new List<List<object>>()
{
    new List<object>(){"https://toscrape.com", 1},
    new List<object>(){"https://books.toscrape.com", 2},
    new List<object>(){"https://quotes.toscrape.com", 3},
};
var output = new List<HttpResponseMessage>();

var handler = new HttpClientHandler()
{
    AutomaticDecompression = DecompressionMethods.All,
    MaxConnectionsPerServer = 15
};
var client = new HttpClient(handler);

var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);

client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");

var responseTasks = new List<Task<HttpResponseMessage>>();
foreach (var entry in inputData)
{
    var input = new Dictionary<string, object>(){
        {"url", entry[0]},
        {"browserHtml", true},
        {"echoData", entry[1]}
    };
    var inputJson = JsonSerializer.Serialize(input);
    var content = new StringContent(inputJson, Encoding.UTF8, "application/json");
    var responseTask = client.PostAsync("https://api.zyte.com/v1/extract", content);
    responseTasks.Add(responseTask);
}

while (responseTasks.Any())
{
    var responseTask = await Task.WhenAny(responseTasks);
    responseTasks.Remove(responseTask);
    var response = await responseTask;
    output.Add(response);
}
input.jsonl#
{"url": "https://toscrape.com", "browserHtml": true, "echoData": 1}
{"url": "https://books.toscrape.com", "browserHtml": true, "echoData": 2}
{"url": "https://quotes.toscrape.com", "browserHtml": true, "echoData": 3}
cat input.jsonl \
| xargs -P 15 -d\\n -n 1 \
bash -c "
    curl \
        --user $ZYTE_API_KEY: \
        --header 'Content-Type: application/json' \
        --data \"\$0\" \
        --compressed \
        https://api.zyte.com/v1/extract \
    | jq .echoData \
    | awk '{print \$1}' \
    >> output.jsonl
"
input.jsonl#
{"url": "https://toscrape.com", "browserHtml": true, "echoData": 1}
{"url": "https://books.toscrape.com", "browserHtml": true, "echoData": 2}
{"url": "https://quotes.toscrape.com", "browserHtml": true, "echoData": 3}
zyte-api --n-conn 15 input.jsonl -o output.jsonl
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Base64;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import org.apache.hc.client5.http.async.methods.SimpleHttpRequest;
import org.apache.hc.client5.http.async.methods.SimpleHttpResponse;
import org.apache.hc.client5.http.impl.async.CloseableHttpAsyncClient;
import org.apache.hc.client5.http.impl.async.HttpAsyncClients;
import org.apache.hc.client5.http.impl.nio.PoolingAsyncClientConnectionManager;
import org.apache.hc.client5.http.impl.nio.PoolingAsyncClientConnectionManagerBuilder;
import org.apache.hc.client5.http.ssl.ClientTlsStrategyBuilder;
import org.apache.hc.core5.concurrent.FutureCallback;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.nio.ssl.TlsStrategy;
import org.apache.hc.core5.reactor.ssl.TlsDetails;

class Example {
  private static final String API_KEY = "YOUR_API_KEY";

  public static void main(final String[] args)
      throws ExecutionException, InterruptedException, IOException, ParseException {

    Object[][] input = {
      {"https://toscrape.com", 1},
      {"https://bookstoscrape.com", 2},
      {"https://quotes.toscrape.com", 3}
    };
    List<Future> futures = new ArrayList<Future>();
    List<String> output = new ArrayList<String>();

    int concurrency = 15;

    // https://issues.apache.org/jira/browse/HTTPCLIENT-2219
    final TlsStrategy tlsStrategy =
        ClientTlsStrategyBuilder.create()
            .useSystemProperties()
            .setTlsDetailsFactory(
                sslEngine ->
                    new TlsDetails(sslEngine.getSession(), sslEngine.getApplicationProtocol()))
            .build();

    PoolingAsyncClientConnectionManager connectionManager =
        PoolingAsyncClientConnectionManagerBuilder.create().setTlsStrategy(tlsStrategy).build();
    connectionManager.setMaxTotal(concurrency);
    connectionManager.setDefaultMaxPerRoute(concurrency);

    CloseableHttpAsyncClient client =
        HttpAsyncClients.custom().setConnectionManager(connectionManager).build();
    try {
      client.start();
      for (int i = 0; i < input.length; i++) {
        Map<String, Object> parameters =
            ImmutableMap.of("url", input[i][0], "browserHtml", true, "echoData", input[i][1]);
        String requestBody = new Gson().toJson(parameters);

        SimpleHttpRequest request =
            new SimpleHttpRequest("POST", "https://api.zyte.com/v1/extract");
        request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
        request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
        request.setBody(requestBody, ContentType.APPLICATION_JSON);

        final Future<SimpleHttpResponse> future =
            client.execute(
                request,
                new FutureCallback<SimpleHttpResponse>() {
                  public void completed(final SimpleHttpResponse response) {
                    String apiResponse = response.getBodyText();
                    output.add(apiResponse);
                  }

                  public void failed(final Exception ex) {}

                  public void cancelled() {}
                });
        futures.add(future);
      }
      for (int i = 0; i < futures.size(); i++) {
        futures.get(i).get();
      }
    } finally {
      client.close();
    }
  }

  private static String buildAuthHeader() {
    String auth = API_KEY + ":";
    String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
    return "Basic " + encodedAuth;
  }
}
const { ConcurrencyManager } = require('axios-concurrency')
const axios = require('axios')

const urls = [
  ['https://toscrape.com', 1],
  ['https://books.toscrape.com', 2],
  ['https://quotes.toscrape.com', 3]
]
const output = []

const client = axios.create()
ConcurrencyManager(client, 15)

Promise.all(
  urls.map((input) =>
    client.post(
      'https://api.zyte.com/v1/extract',
      { url: input[0], browserHtml: true, echoData: input[1] },
      {
        auth: { username: 'YOUR_API_KEY' }
      }
    ).then((response) => output.push(response.data))
  )
)
<?php

$input = [
    ['https://toscrape.com', 1],
    ['https://books.toscrape.com', 2],
    ['https://quotes.toscrape.com', 3],
];
$output = [];
$promises = [];

$client = new GuzzleHttp\Client();

foreach ($input as $url_and_index) {
    $options = [
        'auth' => ['YOUR_API_KEY', ''],
        'headers' => ['Accept-Encoding' => 'gzip'],
        'json' => [
            'url' => $url_and_index[0],
            'browserHtml' => true,
            'echoData' => $url_and_index[1],
        ],
    ];
    $request = new \GuzzleHttp\Psr7\Request('POST', 'https://api.zyte.com/v1/extract');
    global $promises;
    $promises[] = $client->sendAsync($request, $options)->then(function ($response) {
        global $output;
        $output[] = json_decode($response->getBody());
    });
}

foreach ($promises as $promise) {
    $promise->wait();
}
import asyncio

import aiohttp

input_data = [
    ('https://toscrape.com', 1),
    ('https://books.toscrape.com', 2),
    ('https://quotes.toscrape.com', 3),
]
output = []


async def extract(client, url, index):
    response = await client.post(
        'https://api.zyte.com/v1/extract',
        json={'url': url, 'browserHtml': True, 'echoData': index},
        auth=aiohttp.BasicAuth('YOUR_API_KEY'),
    )
    output.append(await response.json())


async def main():
    connector = aiohttp.TCPConnector(limit_per_host=15)
    async with aiohttp.ClientSession(connector=connector) as client:
        await asyncio.gather(
            *[extract(client, url, index) for url, index in input_data]
        )

asyncio.run(main())
import asyncio

from zyte_api.aio.client import AsyncClient, create_session

input_data = [
    ('https://toscrape.com', 1),
    ('https://books.toscrape.com', 2),
    ('https://quotes.toscrape.com', 3),
]
output = []


async def main():
    connection_count = 15
    client = AsyncClient(n_conn=connection_count)
    requests = [
        {'url': url, 'browserHtml': True, 'echoData': index}
        for url, index in input_data
    ]
    async with create_session(connection_count) as session:
        responses = client.request_parallel_as_completed(
            requests,
            session=session,
        )
        for response in responses:
            output.append(await response)

asyncio.run(main())
from scrapy import Request, Spider

input_data = [
    ("https://toscrape.com", 1),
    ("https://books.toscrape.com", 2),
    ("https://quotes.toscrape.com", 3),
]


class ToScrapeSpider(Spider):
    name = "toscrape_com"

    custom_settings = {
        "CONCURRENT_REQUESTS": 15,
        "CONCURRENT_REQUESTS_PER_DOMAIN": 15,
    }

    def start_requests(self):
        for url, index in input_data:
            yield Request(
                url,
                meta={
                    "zyte_api_automap": {
                        "browserHtml": True,
                        "echoData": index,
                    },
                },
            )

    def parse(self, response):
        yield {
            "index": response.raw_api_response["echoData"],
            "html": response.text,
        }

Alternatively, you can use Scrapy’s Request.cb_kwargs directly for a similar purpose:


    def start_requests(self):
        for url, index in input_data:
            yield Request(
                url,
                cb_kwargs={"index": index},
                meta={
                    "zyte_api_automap": {
                        "browserHtml": True,
                    },
                },
            )

    def parse(self, response, index):
        yield {
            "index": index,
            "html": response.text,
        }

There is another metadata field that you can set and get verbatim on the API response: jobId. When running your requests from a Zyte Scrapy Cloud job, this field is meant to indicate the corresponding job ID. scrapy-zyte-api fills this field automatically.