Zyte API HTTP requests#

To send HTTP requests through Zyte API, enable httpResponseBody.

For HTTP requests, Zyte API also supports:

Response body#

To extract a response body, set the httpResponseBody key in your API request body to true.

The httpResponseBody key of the response JSON object is the response body. It is Base64-encoded, as it supports binary response bodies.

Example

Note

Install and configure code example requirements to run the example below.

using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;

HttpClientHandler handler = new HttpClientHandler()
{
    AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);

var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);

client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");

var input = new Dictionary<string, object>(){
    {"url", "https://toscrape.com"},
    {"httpResponseBody", true}
};
var inputJson = JsonSerializer.Serialize(input);
var content = new StringContent(inputJson, Encoding.UTF8, "application/json");

HttpResponseMessage response = await client.PostAsync("https://api.zyte.com/v1/extract", content);
var body = await response.Content.ReadAsByteArrayAsync();

var data = JsonDocument.Parse(body);
var base64HttpResponseBody = data.RootElement.GetProperty("httpResponseBody").ToString();
var httpResponseBody = System.Convert.FromBase64String(base64HttpResponseBody);
input.json#
{"url": "https://toscrape.com", "httpResponseBody": true}
curl \
    --user YOUR_API_KEY: \
    --header 'Content-Type: application/json' \
    --data @input.json \
    --compressed \
    https://api.zyte.com/v1/extract \
| jq --raw-output .httpResponseBody \
| base64 --decode \
> output.html
input.jsonl#
{"url": "https://toscrape.com", "httpResponseBody": true}
zyte-api input.jsonl 2> /dev/null \
| jq --raw-output .httpResponseBody \
| base64 --decode \
> output.html
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Map;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;

class Example {
  private static final String API_KEY = "YOUR_API_KEY";

  public static void main(final String[] args)
      throws InterruptedException, IOException, ParseException {
    Map<String, Object> parameters =
        ImmutableMap.of("url", "https://toscrape.com", "httpResponseBody", true);
    String requestBody = new Gson().toJson(parameters);

    HttpPost request = new HttpPost("https://api.zyte.com/v1/extract");
    request.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
    request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
    request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
    request.setEntity(new StringEntity(requestBody));

    try (CloseableHttpClient client = HttpClients.createDefault()) {
      try (CloseableHttpResponse response = client.execute(request)) {
        HttpEntity entity = response.getEntity();
        String apiResponse = EntityUtils.toString(entity, StandardCharsets.UTF_8);
        JsonObject jsonObject = JsonParser.parseString(apiResponse).getAsJsonObject();
        String base64HttpResponseBody = jsonObject.get("httpResponseBody").getAsString();
        byte[] httpResponseBodyBytes = Base64.getDecoder().decode(base64HttpResponseBody);
        String httpResponseBody = new String(httpResponseBodyBytes, StandardCharsets.UTF_8);
      }
    }
  }

  private static String buildAuthHeader() {
    String auth = API_KEY + ":";
    String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
    return "Basic " + encodedAuth;
  }
}
const axios = require('axios')

axios.post(
  'https://api.zyte.com/v1/extract',
  {
    url: 'https://toscrape.com',
    httpResponseBody: true
  },
  {
    auth: { username: 'YOUR_API_KEY' }
  }
).then((response) => {
  const httpResponseBody = Buffer.from(
    response.data.httpResponseBody,
    'base64'
  )
})
<?php

$client = new GuzzleHttp\Client();
$response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
    'auth' => ['YOUR_API_KEY', ''],
    'headers' => ['Accept-Encoding' => 'gzip'],
    'json' => [
        'url' => 'https://toscrape.com',
        'httpResponseBody' => true,
    ],
]);
$data = json_decode($response->getBody());
$http_response_body = base64_decode($data->httpResponseBody);
from base64 import b64decode

import requests

api_response = requests.post(
    'https://api.zyte.com/v1/extract',
    auth=('YOUR_API_KEY', ''),
    json={
        'url': 'https://toscrape.com',
        'httpResponseBody': True,
    },
)
http_response_body: bytes = b64decode(
    api_response.json()['httpResponseBody']
)
import asyncio
from base64 import b64decode

from zyte_api.aio.client import AsyncClient

async def main():
    client = AsyncClient()
    api_response = await client.request_raw(
        {
            'url': 'https://toscrape.com',
            'httpResponseBody': True,
        }
    )
    http_response_body: bytes = b64decode(
        api_response['httpResponseBody']
    )

asyncio.run(main())

In transparent mode, when you target a text resource (e.g. HTML, JSON), regular Scrapy requests work out of the box:

from scrapy import Spider


class ToScrapeSpider(Spider):
    name = "toscrape_com"
    start_urls = ["https://toscrape.com"]

    def parse(self, response):
        http_response_text: str = response.text

While regular Scrapy requests also work for binary responses at the moment, they may stop working in future versions of scrapy-zyte-api, so passing httpResponseBody is recommended when targeting binary resources:

from scrapy import Request, Spider


class ToScrapeSpider(Spider):
    name = "toscrape_com"

    def start_requests(self):
        yield Request(
            "https://toscrape.com",
            meta={
                "zyte_api_automap": {
                    "httpResponseBody": True,
                },
            },
        )

    def parse(self, response):
        http_response_body: bytes = response.body

Decoding HTML#

HTML extracted as a response body needs to be decoded.

HTML content can be encoded with one of many character encodings, and you must determine the character encoding used so that you can decode that HTML content accordingly.

The best way to determine the encoding of HTML content is to follow the encoding sniffing algorithm defined in the HTML standard.

In addition to the HTML content, the HTML encoding sniffing algorithm takes into account any character encoding provided in the optional charset parameter of media types declared in the Content-Type response header, so make sure you get the response headers in addition to the response body if you are following the HTML encoding sniffing algorithm.

Example

Note

Install and configure code example requirements to run the example below.

Use file to find the media type of a previously-downloaded response based solely on its body (i.e. not following the HTML encoding sniffing algorithm).

file --mime-encoding output.html

Use content-type-parser, html-encoding-sniffer and whatwg-encoding:

const contentTypeParser = require('content-type-parser')
const htmlEncodingSniffer = require('html-encoding-sniffer')
const whatwgEncoding = require('whatwg-encoding')

// …

const httpResponseHeaders = response.data.httpResponseHeaders
let contentTypeCharset
httpResponseHeaders.forEach(function (item) {
  if (item.name.toLowerCase() === 'content-type') {
    contentTypeCharset = contentTypeParser(item.value).get('charset')
  }
})
const httpResponseBody = Buffer.from(response.data.httpResponseBody, 'base64')
const encoding = htmlEncodingSniffer(httpResponseBody, {
  transportLayerEncodingLabel: contentTypeCharset
})
const html = whatwgEncoding.decode(httpResponseBody, encoding)

web-poet provides a response wrapper that automatically decodes the response body following an encoding sniffing algorithm similar to the one defined in the HTML standard.

Provided that you have extracted a response with both body and headers, and you have Base64-decoded the response body, you can decode the HTML bytes as follows:

from web_poet import HttpResponse

# …

headers = tuple(
      (item['name'], item['value'])
      for item in http_response_headers
)
response = HttpResponse(
      url='https://example.com',
      body=http_response_body,
      status=200,
      headers=headers,
)
html = response.text

In transparent mode, regular Scrapy requests targeting HTML resources decode them by default. See Response body.

Request method#

Response body extraction uses a GET request by default.

Use the httpRequestMethod key in your API request body to switch the request method to a different value: POST, PUT, DELETE, OPTIONS, TRACE, PATCH, HEAD.

Example

Note

Install and configure code example requirements to run the example below.

using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;

HttpClientHandler handler = new HttpClientHandler()
{
    AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);

var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);

client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");

var input = new Dictionary<string, object>(){
    {"url", "https://httpbin.org/anything"},
    {"httpResponseBody", true},
    {"httpRequestMethod", "POST"}
};
var inputJson = JsonSerializer.Serialize(input);
var content = new StringContent(inputJson, Encoding.UTF8, "application/json");

HttpResponseMessage response = await client.PostAsync("https://api.zyte.com/v1/extract", content);
var body = await response.Content.ReadAsByteArrayAsync();

var data = JsonDocument.Parse(body);
var base64HttpResponseBody = data.RootElement.GetProperty("httpResponseBody").ToString();
var httpResponseBody = System.Convert.FromBase64String(base64HttpResponseBody);

var responseData = JsonDocument.Parse(httpResponseBody);
var method = responseData.RootElement.GetProperty("method").ToString();
input.json#
{"url": "https://httpbin.org/anything", "httpResponseBody": true, "httpRequestMethod": "POST"}
curl \
   --user YOUR_API_KEY: \
   --header 'Content-Type: application/json' \
   --data @input.json \
   --compressed \
   https://api.zyte.com/v1/extract \
| jq --raw-output .httpResponseBody \
| base64 --decode \
| jq .method
input.jsonl#
{"url": "https://httpbin.org/anything", "httpResponseBody": true, "httpRequestMethod": "POST"}
zyte-api input.jsonl 2> /dev/null \
| jq --raw-output .httpResponseBody \
| base64 --decode \
| jq .method
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Map;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;

class Example {
  private static final String API_KEY = "YOUR_API_KEY";

  public static void main(final String[] args)
      throws InterruptedException, IOException, ParseException {
    Map<String, Object> parameters =
        ImmutableMap.of(
            "url",
            "https://httpbin.org/anything",
            "httpResponseBody",
            true,
            "httpRequestMethod",
            "POST");
    String requestBody = new Gson().toJson(parameters);

    HttpPost request = new HttpPost("https://api.zyte.com/v1/extract");
    request.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
    request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
    request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
    request.setEntity(new StringEntity(requestBody));

    try (CloseableHttpClient client = HttpClients.createDefault()) {
      try (CloseableHttpResponse response = client.execute(request)) {
        HttpEntity entity = response.getEntity();
        String apiResponse = EntityUtils.toString(entity, StandardCharsets.UTF_8);
        JsonObject jsonObject = JsonParser.parseString(apiResponse).getAsJsonObject();
        String base64HttpResponseBody = jsonObject.get("httpResponseBody").getAsString();
        byte[] httpResponseBodyBytes = Base64.getDecoder().decode(base64HttpResponseBody);
        String httpResponseBody = new String(httpResponseBodyBytes, StandardCharsets.UTF_8);
        JsonObject data = JsonParser.parseString(httpResponseBody).getAsJsonObject();
        String method = data.get("method").getAsString();
      }
    }
  }

  private static String buildAuthHeader() {
    String auth = API_KEY + ":";
    String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
    return "Basic " + encodedAuth;
  }
}
const axios = require('axios')

axios.post(
  'https://api.zyte.com/v1/extract',
  {
    url: 'https://httpbin.org/anything',
    httpResponseBody: true,
    httpRequestMethod: 'POST'
  },
  {
    auth: { username: 'YOUR_API_KEY' }
  }
).then((response) => {
  const httpResponseBody = Buffer.from(
    response.data.httpResponseBody,
    'base64'
  )
  const method = JSON.parse(httpResponseBody).method
})
<?php

$client = new GuzzleHttp\Client();
$response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
    'auth' => ['YOUR_API_KEY', ''],
    'headers' => ['Accept-Encoding' => 'gzip'],
    'json' => [
        'url' => 'https://httpbin.org/anything',
        'httpResponseBody' => true,
        'httpRequestMethod' => 'POST',
    ],
]);
$data = json_decode($response->getBody());
$http_response_body = base64_decode($data->httpResponseBody);
$method = json_decode($http_response_body)->method;
import json
from base64 import b64decode

import requests

api_response = requests.post(
   'https://api.zyte.com/v1/extract',
   auth=('YOUR_API_KEY', ''),
   json={
      'url': 'https://httpbin.org/anything',
      'httpResponseBody': True,
      'httpRequestMethod': 'POST',
   },
)
http_response_body = b64decode(
   api_response.json()['httpResponseBody']
)
method = json.loads(http_response_body)['method']
import asyncio
import json
from base64 import b64decode

from zyte_api.aio.client import AsyncClient

async def main():
   client = AsyncClient()
   api_response = await client.request_raw(
      {
            'url': 'https://httpbin.org/anything',
            'httpResponseBody': True,
            'httpRequestMethod': 'POST',
      }
   )
   http_response_body: bytes = b64decode(
      api_response['httpResponseBody']
   )
   method = json.loads(http_response_body)['method']

asyncio.run(main())
import json

from scrapy import Request, Spider


class HTTPBinOrgSpider(Spider):
    name = "httpbin_org"

    def start_requests(self):
        yield Request(
            "https://httpbin.org/anything",
            method="POST",
        )

    def parse(self, response):
        method = json.loads(response.text)["method"]

The HEAD and CONNECT request methods are not supported.

Request body#

If you use a different request method, you may also need to set a request body.

You can use any of the following JSON keys in the body of your extract request to set a request body:

  • httpRequestText, for UTF-8-encoded text.

  • httpRequestBody, for anything else. It supports binary data as well, so the value must be Base64-encoded.

httpRequestText example

Note

Install and configure code example requirements to run the example below.

using System;
using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;

HttpClientHandler handler = new HttpClientHandler()
{
    AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);

var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);

client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");

var input = new Dictionary<string, object>(){
    {"url", "https://httpbin.org/anything"},
    {"httpResponseBody", true},
    {"httpRequestMethod", "POST"},
    {"httpRequestText", "{\"foo\": \"bar\"}"}
};
var inputJson = JsonSerializer.Serialize(input);
var content = new StringContent(inputJson, Encoding.UTF8, "application/json");

HttpResponseMessage response = await client.PostAsync("https://api.zyte.com/v1/extract", content);
var body = await response.Content.ReadAsByteArrayAsync();

var data = JsonDocument.Parse(body);
var base64HttpResponseBody = data.RootElement.GetProperty("httpResponseBody").ToString();
var httpResponseBody = System.Convert.FromBase64String(base64HttpResponseBody);

var responseData = JsonDocument.Parse(httpResponseBody);
var requestBody = responseData.RootElement.GetProperty("data").ToString();

Console.WriteLine(requestBody);
input.json#
{
    "url": "https://httpbin.org/anything",
    "httpResponseBody": true,
    "httpRequestMethod": "POST",
    "httpRequestText": "{\"foo\": \"bar\"}"
}
curl \
    --user YOUR_API_KEY: \
    --header 'Content-Type: application/json' \
    --data @input.json \
    --compressed \
    https://api.zyte.com/v1/extract \
| jq --raw-output .httpResponseBody \
| base64 --decode \
| jq --raw-output .data
input.jsonl#
{"url": "https://httpbin.org/anything", "httpResponseBody": true, "httpRequestMethod": "POST", "httpRequestText": "{\"foo\": \"bar\"}"}
zyte-api input.jsonl \
| jq --raw-output .httpResponseBody \
| base64 --decode \
| jq --raw-output .data
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Map;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;

class Example {
  private static final String API_KEY = "YOUR_API_KEY";

  public static void main(final String[] args)
      throws InterruptedException, IOException, ParseException {
    Map<String, Object> parameters =
        ImmutableMap.of(
            "url",
            "https://httpbin.org/anything",
            "httpResponseBody",
            true,
            "httpRequestMethod",
            "POST",
            "httpRequestText",
            "{\"foo\": \"bar\"}");
    String requestBody = new Gson().toJson(parameters);

    HttpPost request = new HttpPost("https://api.zyte.com/v1/extract");
    request.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
    request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
    request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
    request.setEntity(new StringEntity(requestBody));

    try (CloseableHttpClient client = HttpClients.createDefault()) {
      try (CloseableHttpResponse response = client.execute(request)) {
        HttpEntity entity = response.getEntity();
        String apiResponse = EntityUtils.toString(entity, StandardCharsets.UTF_8);
        JsonObject jsonObject = JsonParser.parseString(apiResponse).getAsJsonObject();
        String base64HttpResponseBody = jsonObject.get("httpResponseBody").getAsString();
        byte[] httpResponseBodyBytes = Base64.getDecoder().decode(base64HttpResponseBody);
        String httpResponseBody = new String(httpResponseBodyBytes, StandardCharsets.UTF_8);
        JsonObject data = JsonParser.parseString(httpResponseBody).getAsJsonObject();
        String body = data.get("data").getAsString();
        System.out.println(body);
      }
    }
  }

  private static String buildAuthHeader() {
    String auth = API_KEY + ":";
    String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
    return "Basic " + encodedAuth;
  }
}
const axios = require('axios')

axios.post(
  'https://api.zyte.com/v1/extract',
  {
    url: 'https://httpbin.org/anything',
    httpResponseBody: true,
    httpRequestMethod: 'POST',
    httpRequestText: '{"foo": "bar"}'
  },
  {
    auth: { username: 'YOUR_API_KEY' }
  }
).then((response) => {
  const httpResponseBody = Buffer.from(
    response.data.httpResponseBody,
    'base64'
  )
  const body = JSON.parse(httpResponseBody).data
  console.log(body)
})
<?php

$client = new GuzzleHttp\Client();
$response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
    'auth' => ['YOUR_API_KEY', ''],
    'headers' => ['Accept-Encoding' => 'gzip'],
    'json' => [
        'url' => 'https://httpbin.org/anything',
        'httpResponseBody' => true,
        'httpRequestMethod' => 'POST',
        'httpRequestText' => '{"foo": "bar"}',
    ],
]);
$data = json_decode($response->getBody());
$http_response_body = base64_decode($data->httpResponseBody);
$body = json_decode($http_response_body)->data;
echo $body.PHP_EOL;
import json
from base64 import b64decode

import requests

api_response = requests.post(
    "https://api.zyte.com/v1/extract",
    auth=("YOUR_API_KEY", ""),
    json={
        "url": "https://httpbin.org/anything",
        "httpResponseBody": True,
        "httpRequestMethod": "POST",
        "httpRequestText": '{"foo": "bar"}',
    },
)
http_response_body = b64decode(api_response.json()["httpResponseBody"])
body: str = json.loads(http_response_body)["data"]
print(body)
import asyncio
import json
from base64 import b64decode

from zyte_api.aio.client import AsyncClient


async def main():
    client = AsyncClient()
    api_response = await client.request_raw(
        {
            "url": "https://httpbin.org/anything",
            "httpResponseBody": True,
            "httpRequestMethod": "POST",
            "httpRequestText": '{"foo": "bar"}',
        }
    )
    http_response_body: bytes = b64decode(api_response["httpResponseBody"])
    body: str = json.loads(http_response_body)["data"]
    print(body)


asyncio.run(main())
import json

from scrapy import Request, Spider


class HTTPBinOrgSpider(Spider):
    name = "httpbin_org"

    def start_requests(self):
        yield Request(
            "https://httpbin.org/anything",
            method="POST",
            body='{"foo": "bar"}',
        )

    def parse(self, response):
        body = json.loads(response.body)["data"]
        print(body)

Output:

{"foo": "bar"}
httpRequestBody example

Note

Install and configure code example requirements to run the example below.

using System;
using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;

HttpClientHandler handler = new HttpClientHandler()
{
    AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);

var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);

client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");

var input = new Dictionary<string, object>(){
    {"url", "https://httpbin.org/anything"},
    {"httpResponseBody", true},
    {"httpRequestMethod", "POST"},
    {"httpRequestBody", "Zm9v"}
};
var inputJson = JsonSerializer.Serialize(input);
var content = new StringContent(inputJson, Encoding.UTF8, "application/json");

HttpResponseMessage response = await client.PostAsync("https://api.zyte.com/v1/extract", content);
var body = await response.Content.ReadAsByteArrayAsync();

var data = JsonDocument.Parse(body);
var base64HttpResponseBody = data.RootElement.GetProperty("httpResponseBody").ToString();
var httpResponseBody = System.Convert.FromBase64String(base64HttpResponseBody);

var responseData = JsonDocument.Parse(httpResponseBody);
var requestBody = responseData.RootElement.GetProperty("data").ToString();

Console.WriteLine(requestBody);
input.json#
{
    "url": "https://httpbin.org/anything",
    "httpResponseBody": true,
    "httpRequestMethod": "POST",
    "httpRequestBody": "Zm9v"
}
curl \
    --user YOUR_API_KEY: \
    --header 'Content-Type: application/json' \
    --data @input.json \
    --compressed \
    https://api.zyte.com/v1/extract \
| jq --raw-output .httpResponseBody \
| base64 --decode \
| jq --raw-output .data
input.jsonl#
{"url": "https://httpbin.org/anything", "httpResponseBody": true, "httpRequestMethod": "POST", "httpRequestBody": "Zm9v"}
zyte-api input.jsonl \
| jq --raw-output .httpResponseBody \
| base64 --decode \
| jq --raw-output .data
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Map;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;

class Example {
  private static final String API_KEY = "YOUR_API_KEY";

  public static void main(final String[] args)
      throws InterruptedException, IOException, ParseException {
    Map<String, Object> parameters =
        ImmutableMap.of(
            "url",
            "https://httpbin.org/anything",
            "httpResponseBody",
            true,
            "httpRequestMethod",
            "POST",
            "httpRequestBody",
            "Zm9v");
    String requestBody = new Gson().toJson(parameters);

    HttpPost request = new HttpPost("https://api.zyte.com/v1/extract");
    request.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
    request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
    request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
    request.setEntity(new StringEntity(requestBody));

    try (CloseableHttpClient client = HttpClients.createDefault()) {
      try (CloseableHttpResponse response = client.execute(request)) {
        HttpEntity entity = response.getEntity();
        String apiResponse = EntityUtils.toString(entity, StandardCharsets.UTF_8);
        JsonObject jsonObject = JsonParser.parseString(apiResponse).getAsJsonObject();
        String base64HttpResponseBody = jsonObject.get("httpResponseBody").getAsString();
        byte[] httpResponseBodyBytes = Base64.getDecoder().decode(base64HttpResponseBody);
        String httpResponseBody = new String(httpResponseBodyBytes, StandardCharsets.UTF_8);
        JsonObject data = JsonParser.parseString(httpResponseBody).getAsJsonObject();
        String body = data.get("data").getAsString();
        System.out.println(body);
      }
    }
  }

  private static String buildAuthHeader() {
    String auth = API_KEY + ":";
    String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
    return "Basic " + encodedAuth;
  }
}
const axios = require('axios')

axios.post(
  'https://api.zyte.com/v1/extract',
  {
    url: 'https://httpbin.org/anything',
    httpResponseBody: true,
    httpRequestMethod: 'POST',
    httpRequestBody: 'Zm9v'
  },
  {
    auth: { username: 'YOUR_API_KEY' }
  }
).then((response) => {
  const httpResponseBody = Buffer.from(
    response.data.httpResponseBody,
    'base64'
  )
  const body = JSON.parse(httpResponseBody).data
  console.log(body)
})
<?php

$client = new GuzzleHttp\Client();
$response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
    'auth' => ['YOUR_API_KEY', ''],
    'headers' => ['Accept-Encoding' => 'gzip'],
    'json' => [
        'url' => 'https://httpbin.org/anything',
        'httpResponseBody' => true,
        'httpRequestMethod' => 'POST',
        'httpRequestBody' => 'Zm9v',
    ],
]);
$data = json_decode($response->getBody());
$http_response_body = base64_decode($data->httpResponseBody);
$body = json_decode($http_response_body)->data;
echo $body.PHP_EOL;
import json
from base64 import b64decode

import requests

api_response = requests.post(
    "https://api.zyte.com/v1/extract",
    auth=("YOUR_API_KEY", ""),
    json={
        "url": "https://httpbin.org/anything",
        "httpResponseBody": True,
        "httpRequestMethod": "POST",
        "httpRequestBody": "Zm9v",
    },
)
http_response_body = b64decode(api_response.json()["httpResponseBody"])
body: str = json.loads(http_response_body)["data"]
print(body)
import asyncio
import json
from base64 import b64decode

from zyte_api.aio.client import AsyncClient


async def main():
    client = AsyncClient()
    api_response = await client.request_raw(
        {
            "url": "https://httpbin.org/anything",
            "httpResponseBody": True,
            "httpRequestMethod": "POST",
            "httpRequestBody": "Zm9v",
        }
    )
    http_response_body: bytes = b64decode(api_response["httpResponseBody"])
    body: str = json.loads(http_response_body)["data"]
    print(body)


asyncio.run(main())
import json

from scrapy import Request, Spider


class HTTPBinOrgSpider(Spider):
    name = "httpbin_org"

    def start_requests(self):
        yield Request(
            "https://httpbin.org/anything",
            method="POST",
            body=b"foo",
        )

    def parse(self, response):
        body = json.loads(response.body)["data"]
        print(body)

Output:

foo

Request headers#

Set the customHttpRequestHeaders key in your API request body to an array of objects with name and value keys representing headers to include in your request.

Example

Note

Install and configure code example requirements to run the example below.

using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;

HttpClientHandler handler = new HttpClientHandler()
{
    AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);

var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);

client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");

var input = new Dictionary<string, object>(){
    {"url", "https://httpbin.org/anything"},
    {"httpResponseBody", true},
    {
        "customHttpRequestHeaders",
        new List<Dictionary<string, object>>()
        {
            new Dictionary<string, object>()
            {
                {"name", "Accept-Language"},
                {"value", "fa"}
            }
        }
    }
};
var inputJson = JsonSerializer.Serialize(input);
var content = new StringContent(inputJson, Encoding.UTF8, "application/json");

HttpResponseMessage response = await client.PostAsync("https://api.zyte.com/v1/extract", content);
var body = await response.Content.ReadAsByteArrayAsync();

var data = JsonDocument.Parse(body);
var base64HttpResponseBody = data.RootElement.GetProperty("httpResponseBody").ToString();
var httpResponseBody = System.Convert.FromBase64String(base64HttpResponseBody);

var responseData = JsonDocument.Parse(httpResponseBody);
var headerEnumerator = responseData.RootElement.GetProperty("headers").EnumerateObject();
var headers = new Dictionary<string, string>();
while (headerEnumerator.MoveNext())
{
    headers.Add(
        headerEnumerator.Current.Name.ToString(),
        headerEnumerator.Current.Value.ToString()
    );
}
input.json#
{
   "url": "https://httpbin.org/anything",
   "httpResponseBody": true,
   "customHttpRequestHeaders": [
      {
            "name": "Accept-Language",
            "value": "fa"
      }
   ]
}
curl \
   --user YOUR_API_KEY: \
   --header 'Content-Type: application/json' \
   --data @input.json \
   --compressed \
   https://api.zyte.com/v1/extract \
| jq --raw-output .httpResponseBody \
| base64 --decode \
| jq .headers
input.jsonl#
{"url": "https://httpbin.org/anything", "httpResponseBody": true, "customHttpRequestHeaders": [{"name": "Accept-Language", "value": "fa"}]}
zyte-api input.jsonl 2> /dev/null \
| jq --raw-output .httpResponseBody \
| base64 --decode \
| jq .headers
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Collections;
import java.util.Map;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;

class Example {
  private static final String API_KEY = "YOUR_API_KEY";

  public static void main(final String[] args)
      throws InterruptedException, IOException, ParseException {
    Map<String, Object> customHttpRequestHeader =
        ImmutableMap.of("name", "Accept-Language", "value", "fa");
    Map<String, Object> parameters =
        ImmutableMap.of(
            "url",
            "https://httpbin.org/anything",
            "httpResponseBody",
            true,
            "customHttpRequestHeaders",
            Collections.singletonList(customHttpRequestHeader));
    String requestBody = new Gson().toJson(parameters);

    HttpPost request = new HttpPost("https://api.zyte.com/v1/extract");
    request.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
    request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
    request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
    request.setEntity(new StringEntity(requestBody));

    try (CloseableHttpClient client = HttpClients.createDefault()) {
      try (CloseableHttpResponse response = client.execute(request)) {
        HttpEntity entity = response.getEntity();
        String apiResponse = EntityUtils.toString(entity, StandardCharsets.UTF_8);
        JsonObject jsonObject = JsonParser.parseString(apiResponse).getAsJsonObject();
        String base64HttpResponseBody = jsonObject.get("httpResponseBody").getAsString();
        byte[] httpResponseBodyBytes = Base64.getDecoder().decode(base64HttpResponseBody);
        String httpResponseBody = new String(httpResponseBodyBytes, StandardCharsets.UTF_8);
        JsonObject data = JsonParser.parseString(httpResponseBody).getAsJsonObject();
        JsonObject headers = data.get("headers").getAsJsonObject();
      }
    }
  }

  private static String buildAuthHeader() {
    String auth = API_KEY + ":";
    String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
    return "Basic " + encodedAuth;
  }
}
const axios = require('axios')

axios.post(
  'https://api.zyte.com/v1/extract',
  {
    url: 'https://httpbin.org/anything',
    httpResponseBody: true,
    customHttpRequestHeaders: [
      {
        name: 'Accept-Language',
        value: 'fa'
      }
    ]
  },
  {
    auth: { username: 'YOUR_API_KEY' }
  }
).then((response) => {
  const httpResponseBody = Buffer.from(
    response.data.httpResponseBody,
    'base64'
  )
  const headers = JSON.parse(httpResponseBody).headers
})
<?php

$client = new GuzzleHttp\Client();
$response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
    'auth' => ['YOUR_API_KEY', ''],
    'headers' => ['Accept-Encoding' => 'gzip'],
    'json' => [
        'url' => 'https://httpbin.org/anything',
        'httpResponseBody' => true,
        'customHttpRequestHeaders' => [
            [
                'name' => 'Accept-Language',
                'value' => 'fa',
            ],
        ],
    ],
]);
$api = json_decode($response->getBody());
$http_response_body = base64_decode($api->httpResponseBody);
$data = json_decode($http_response_body);
$headers = $data->headers;
import json
from base64 import b64decode

import requests

api_response = requests.post(
   'https://api.zyte.com/v1/extract',
   auth=('YOUR_API_KEY', ''),
   json={
      'url': 'https://httpbin.org/anything',
      'httpResponseBody': True,
      'customHttpRequestHeaders': [
            {
               'name': 'Accept-Language',
               'value': 'fa',
            },
      ],
   },
)
http_response_body = b64decode(
   api_response.json()['httpResponseBody']
)
headers = json.loads(http_response_body)['headers']
import asyncio
import json
from base64 import b64decode

from zyte_api.aio.client import AsyncClient

async def main():
   client = AsyncClient()
   api_response = await client.request_raw(
      {
            'url': 'https://httpbin.org/anything',
            'httpResponseBody': True,
            'customHttpRequestHeaders': [
               {
                  'name': 'Accept-Language',
                  'value': 'fa',
               },
            ],
      }
   )
   http_response_body: bytes = b64decode(
      api_response['httpResponseBody']
   )
   headers = json.loads(http_response_body)['headers']

asyncio.run(main())
import json

from scrapy import Request, Spider


class HTTPBinOrgSpider(Spider):
    name = "httpbin_org"

    def start_requests(self):
        yield Request(
            "https://httpbin.org/anything",
            headers={"Accept-Language": "fa"},
        )

    def parse(self, response):
        headers = json.loads(response.text)["headers"]

Zyte API sends some headers automatically. In case of conflict, your custom headers will usually override Zyte API headers. However, Zyte API may silently override or drop some of your custom headers to reduce the chance of your request being banned. For example, you can never set custom Cookie or User-Agent headers.

Tip

To set cookies, see Cookies.

If you set multiple headers with the same name, only the last header value will be sent. To overcome this limitation, join the header values with a comma into a single header value. For example, replace "customHttpRequestHeaders": [{"name": "foo", "value": "bar"}, {"name": "foo", "value": "baz"}] with "customHttpRequestHeaders": [{"name": "foo", "value": "bar,baz"}].