Zyte API HTTP requests#
To send HTTP requests through Zyte API, without browser rendering, set the httpResponseBody request field to
true
, and read the Base64-encoded response body from the
httpResponseBody response field.
Example
Note
Install and configure code example requirements and the Zyte CA certificate to run the example below.
using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;
HttpClientHandler handler = new HttpClientHandler()
{
AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);
var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);
client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");
var input = new Dictionary<string, object>(){
{"url", "https://toscrape.com"},
{"httpResponseBody", true}
};
var inputJson = JsonSerializer.Serialize(input);
var content = new StringContent(inputJson, Encoding.UTF8, "application/json");
HttpResponseMessage response = await client.PostAsync("https://api.zyte.com/v1/extract", content);
var body = await response.Content.ReadAsByteArrayAsync();
var data = JsonDocument.Parse(body);
var base64HttpResponseBody = data.RootElement.GetProperty("httpResponseBody").ToString();
var httpResponseBody = System.Convert.FromBase64String(base64HttpResponseBody);
{"url": "https://toscrape.com", "httpResponseBody": true}
zyte-api input.jsonl \
| jq --raw-output .httpResponseBody \
| base64 --decode \
> output.html
{
"url": "https://toscrape.com",
"httpResponseBody": true
}
curl \
--user YOUR_API_KEY: \
--header 'Content-Type: application/json' \
--data @input.json \
--compressed \
https://api.zyte.com/v1/extract \
| jq --raw-output .httpResponseBody \
| base64 --decode \
> output.html
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Map;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;
class Example {
private static final String API_KEY = "YOUR_API_KEY";
public static void main(final String[] args)
throws InterruptedException, IOException, ParseException {
Map<String, Object> parameters =
ImmutableMap.of("url", "https://toscrape.com", "httpResponseBody", true);
String requestBody = new Gson().toJson(parameters);
HttpPost request = new HttpPost("https://api.zyte.com/v1/extract");
request.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
request.setEntity(new StringEntity(requestBody));
try (CloseableHttpClient client = HttpClients.createDefault()) {
try (CloseableHttpResponse response = client.execute(request)) {
HttpEntity entity = response.getEntity();
String apiResponse = EntityUtils.toString(entity, StandardCharsets.UTF_8);
JsonObject jsonObject = JsonParser.parseString(apiResponse).getAsJsonObject();
String base64HttpResponseBody = jsonObject.get("httpResponseBody").getAsString();
byte[] httpResponseBodyBytes = Base64.getDecoder().decode(base64HttpResponseBody);
String httpResponseBody = new String(httpResponseBodyBytes, StandardCharsets.UTF_8);
}
}
}
private static String buildAuthHeader() {
String auth = API_KEY + ":";
String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
return "Basic " + encodedAuth;
}
}
const axios = require('axios')
axios.post(
'https://api.zyte.com/v1/extract',
{
url: 'https://toscrape.com',
httpResponseBody: true
},
{
auth: { username: 'YOUR_API_KEY' }
}
).then((response) => {
const httpResponseBody = Buffer.from(
response.data.httpResponseBody,
'base64'
)
})
<?php
$client = new GuzzleHttp\Client();
$response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
'auth' => ['YOUR_API_KEY', ''],
'headers' => ['Accept-Encoding' => 'gzip'],
'json' => [
'url' => 'https://toscrape.com',
'httpResponseBody' => true,
],
]);
$data = json_decode($response->getBody());
$http_response_body = base64_decode($data->httpResponseBody);
With the proxy mode, you always get a response body.
curl \
--proxy api.zyte.com:8011 \
--proxy-user YOUR_API_KEY: \
--compressed \
https://toscrape.com \
> output.html
from base64 import b64decode
import requests
api_response = requests.post(
"https://api.zyte.com/v1/extract",
auth=("YOUR_API_KEY", ""),
json={
"url": "https://toscrape.com",
"httpResponseBody": True,
},
)
http_response_body: bytes = b64decode(api_response.json()["httpResponseBody"])
import asyncio
from base64 import b64decode
from zyte_api import AsyncZyteAPI
async def main():
client = AsyncZyteAPI()
api_response = await client.get(
{
"url": "https://toscrape.com",
"httpResponseBody": True,
}
)
http_response_body = b64decode(api_response["httpResponseBody"]).decode()
print(http_response_body)
asyncio.run(main())
In transparent mode, when you target a text resource (e.g. HTML, JSON), regular Scrapy requests work out of the box:
from scrapy import Spider
class ToScrapeSpider(Spider):
name = "toscrape_com"
start_urls = ["https://toscrape.com"]
def parse(self, response):
http_response_text: str = response.text
While regular Scrapy requests also work for binary responses at the moment, they may stop working in future versions of scrapy-zyte-api, so passing httpResponseBody is recommended when targeting binary resources:
from scrapy import Request, Spider
class ToScrapeSpider(Spider):
name = "toscrape_com"
def start_requests(self):
yield Request(
"https://toscrape.com",
meta={
"zyte_api_automap": {
"httpResponseBody": True,
},
},
)
def parse(self, response):
http_response_body: bytes = response.body
Output (first 5 lines):
<!DOCTYPE html>
<html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<title>Scraping Sandbox</title>
For HTTP requests, Zyte API also supports:
Geolocation, IP type, cookies, sessions, response headers, and metadata.
Tip
HTTP responses do not reflect HTML content rendered by a web browser that executes JavaScript code. To get browser HTML, use a browser request. See also HTML and browser HTML.
Request method#
HTTP requests use the GET
HTTP method by default. Use the
httpRequestMethod field to set a different HTTP method.
Tip
When using POST
, PUT
or similar, you probably want to also
set a request body.
Example
Note
Install and configure code example requirements and the Zyte CA certificate to run the example below.
using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;
HttpClientHandler handler = new HttpClientHandler()
{
AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);
var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);
client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");
var input = new Dictionary<string, object>(){
{"url", "https://httpbin.org/anything"},
{"httpResponseBody", true},
{"httpRequestMethod", "POST"}
};
var inputJson = JsonSerializer.Serialize(input);
var content = new StringContent(inputJson, Encoding.UTF8, "application/json");
HttpResponseMessage response = await client.PostAsync("https://api.zyte.com/v1/extract", content);
var body = await response.Content.ReadAsByteArrayAsync();
var data = JsonDocument.Parse(body);
var base64HttpResponseBody = data.RootElement.GetProperty("httpResponseBody").ToString();
var httpResponseBody = System.Convert.FromBase64String(base64HttpResponseBody);
var responseData = JsonDocument.Parse(httpResponseBody);
var method = responseData.RootElement.GetProperty("method").ToString();
{"url": "https://httpbin.org/anything", "httpResponseBody": true, "httpRequestMethod": "POST"}
zyte-api input.jsonl \
| jq --raw-output .httpResponseBody \
| base64 --decode \
| jq .method
{
"url": "https://httpbin.org/anything",
"httpResponseBody": true,
"httpRequestMethod": "POST"
}
curl \
--user YOUR_API_KEY: \
--header 'Content-Type: application/json' \
--data @input.json \
--compressed \
https://api.zyte.com/v1/extract \
| jq --raw-output .httpResponseBody \
| base64 --decode \
| jq .method
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Map;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;
class Example {
private static final String API_KEY = "YOUR_API_KEY";
public static void main(final String[] args)
throws InterruptedException, IOException, ParseException {
Map<String, Object> parameters =
ImmutableMap.of(
"url",
"https://httpbin.org/anything",
"httpResponseBody",
true,
"httpRequestMethod",
"POST");
String requestBody = new Gson().toJson(parameters);
HttpPost request = new HttpPost("https://api.zyte.com/v1/extract");
request.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
request.setEntity(new StringEntity(requestBody));
try (CloseableHttpClient client = HttpClients.createDefault()) {
try (CloseableHttpResponse response = client.execute(request)) {
HttpEntity entity = response.getEntity();
String apiResponse = EntityUtils.toString(entity, StandardCharsets.UTF_8);
JsonObject jsonObject = JsonParser.parseString(apiResponse).getAsJsonObject();
String base64HttpResponseBody = jsonObject.get("httpResponseBody").getAsString();
byte[] httpResponseBodyBytes = Base64.getDecoder().decode(base64HttpResponseBody);
String httpResponseBody = new String(httpResponseBodyBytes, StandardCharsets.UTF_8);
JsonObject data = JsonParser.parseString(httpResponseBody).getAsJsonObject();
String method = data.get("method").getAsString();
}
}
}
private static String buildAuthHeader() {
String auth = API_KEY + ":";
String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
return "Basic " + encodedAuth;
}
}
const axios = require('axios')
axios.post(
'https://api.zyte.com/v1/extract',
{
url: 'https://httpbin.org/anything',
httpResponseBody: true,
httpRequestMethod: 'POST'
},
{
auth: { username: 'YOUR_API_KEY' }
}
).then((response) => {
const httpResponseBody = Buffer.from(
response.data.httpResponseBody,
'base64'
)
const method = JSON.parse(httpResponseBody).method
})
<?php
$client = new GuzzleHttp\Client();
$response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
'auth' => ['YOUR_API_KEY', ''],
'headers' => ['Accept-Encoding' => 'gzip'],
'json' => [
'url' => 'https://httpbin.org/anything',
'httpResponseBody' => true,
'httpRequestMethod' => 'POST',
],
]);
$data = json_decode($response->getBody());
$http_response_body = base64_decode($data->httpResponseBody);
$method = json_decode($http_response_body)->method;
With the proxy mode, the request method from your requests is used automatically.
curl \
--proxy api.zyte.com:8011 \
--proxy-user YOUR_API_KEY: \
--compressed \
-X POST \
https://httpbin.org/anything \
| jq .method
import json
from base64 import b64decode
import requests
api_response = requests.post(
"https://api.zyte.com/v1/extract",
auth=("YOUR_API_KEY", ""),
json={
"url": "https://httpbin.org/anything",
"httpResponseBody": True,
"httpRequestMethod": "POST",
},
)
http_response_body = b64decode(api_response.json()["httpResponseBody"])
method = json.loads(http_response_body)["method"]
import asyncio
import json
from base64 import b64decode
from zyte_api import AsyncZyteAPI
async def main():
client = AsyncZyteAPI()
api_response = await client.get(
{
"url": "https://httpbin.org/anything",
"httpResponseBody": True,
"httpRequestMethod": "POST",
}
)
http_response_body: bytes = b64decode(api_response["httpResponseBody"])
method = json.loads(http_response_body)["method"]
print(method)
asyncio.run(main())
import json
from scrapy import Request, Spider
class HTTPBinOrgSpider(Spider):
name = "httpbin_org"
def start_requests(self):
yield Request(
"https://httpbin.org/anything",
method="POST",
)
def parse(self, response):
method = json.loads(response.text)["method"]
Output:
"POST"
Request body#
To include a body in your request, use one of the following fields:
httpRequestText, for UTF-8-encoded text.
httpRequestBody, for anything else. It supports binary data as well, so the value must be Base64-encoded.
httpRequestText
example
Note
Install and configure code example requirements and the Zyte CA certificate to run the example below.
using System;
using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;
HttpClientHandler handler = new HttpClientHandler()
{
AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);
var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);
client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");
var input = new Dictionary<string, object>(){
{"url", "https://httpbin.org/anything"},
{"httpResponseBody", true},
{"httpRequestMethod", "POST"},
{"httpRequestText", "{\"foo\": \"bar\"}"}
};
var inputJson = JsonSerializer.Serialize(input);
var content = new StringContent(inputJson, Encoding.UTF8, "application/json");
HttpResponseMessage response = await client.PostAsync("https://api.zyte.com/v1/extract", content);
var body = await response.Content.ReadAsByteArrayAsync();
var data = JsonDocument.Parse(body);
var base64HttpResponseBody = data.RootElement.GetProperty("httpResponseBody").ToString();
var httpResponseBody = System.Convert.FromBase64String(base64HttpResponseBody);
var responseData = JsonDocument.Parse(httpResponseBody);
var requestBody = responseData.RootElement.GetProperty("data").ToString();
Console.WriteLine(requestBody);
{"url": "https://httpbin.org/anything", "httpResponseBody": true, "httpRequestMethod": "POST", "httpRequestText": "{\"foo\": \"bar\"}"}
zyte-api input.jsonl \
| jq --raw-output .httpResponseBody \
| base64 --decode \
| jq --raw-output .data
{
"url": "https://httpbin.org/anything",
"httpResponseBody": true,
"httpRequestMethod": "POST",
"httpRequestText": "{\"foo\": \"bar\"}"
}
curl \
--user YOUR_API_KEY: \
--header 'Content-Type: application/json' \
--data @input.json \
--compressed \
https://api.zyte.com/v1/extract \
| jq --raw-output .httpResponseBody \
| base64 --decode \
| jq --raw-output .data
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Map;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;
class Example {
private static final String API_KEY = "YOUR_API_KEY";
public static void main(final String[] args)
throws InterruptedException, IOException, ParseException {
Map<String, Object> parameters =
ImmutableMap.of(
"url",
"https://httpbin.org/anything",
"httpResponseBody",
true,
"httpRequestMethod",
"POST",
"httpRequestText",
"{\"foo\": \"bar\"}");
String requestBody = new Gson().toJson(parameters);
HttpPost request = new HttpPost("https://api.zyte.com/v1/extract");
request.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
request.setEntity(new StringEntity(requestBody));
try (CloseableHttpClient client = HttpClients.createDefault()) {
try (CloseableHttpResponse response = client.execute(request)) {
HttpEntity entity = response.getEntity();
String apiResponse = EntityUtils.toString(entity, StandardCharsets.UTF_8);
JsonObject jsonObject = JsonParser.parseString(apiResponse).getAsJsonObject();
String base64HttpResponseBody = jsonObject.get("httpResponseBody").getAsString();
byte[] httpResponseBodyBytes = Base64.getDecoder().decode(base64HttpResponseBody);
String httpResponseBody = new String(httpResponseBodyBytes, StandardCharsets.UTF_8);
JsonObject data = JsonParser.parseString(httpResponseBody).getAsJsonObject();
String body = data.get("data").getAsString();
System.out.println(body);
}
}
}
private static String buildAuthHeader() {
String auth = API_KEY + ":";
String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
return "Basic " + encodedAuth;
}
}
const axios = require('axios')
axios.post(
'https://api.zyte.com/v1/extract',
{
url: 'https://httpbin.org/anything',
httpResponseBody: true,
httpRequestMethod: 'POST',
httpRequestText: '{"foo": "bar"}'
},
{
auth: { username: 'YOUR_API_KEY' }
}
).then((response) => {
const httpResponseBody = Buffer.from(
response.data.httpResponseBody,
'base64'
)
const body = JSON.parse(httpResponseBody).data
console.log(body)
})
<?php
$client = new GuzzleHttp\Client();
$response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
'auth' => ['YOUR_API_KEY', ''],
'headers' => ['Accept-Encoding' => 'gzip'],
'json' => [
'url' => 'https://httpbin.org/anything',
'httpResponseBody' => true,
'httpRequestMethod' => 'POST',
'httpRequestText' => '{"foo": "bar"}',
],
]);
$data = json_decode($response->getBody());
$http_response_body = base64_decode($data->httpResponseBody);
$body = json_decode($http_response_body)->data;
echo $body.PHP_EOL;
With the proxy mode, the request body from your requests is used automatically, be it plain text or binary.
curl \
--proxy api.zyte.com:8011 \
--proxy-user YOUR_API_KEY: \
--compressed \
-X POST \
-H "Content-Type: application/json" \
--data '{"foo": "bar"}' \
https://httpbin.org/anything \
| jq .data
import json
from base64 import b64decode
import requests
api_response = requests.post(
"https://api.zyte.com/v1/extract",
auth=("YOUR_API_KEY", ""),
json={
"url": "https://httpbin.org/anything",
"httpResponseBody": True,
"httpRequestMethod": "POST",
"httpRequestText": '{"foo": "bar"}',
},
)
http_response_body = b64decode(api_response.json()["httpResponseBody"])
body: str = json.loads(http_response_body)["data"]
print(body)
import asyncio
import json
from base64 import b64decode
from zyte_api import AsyncZyteAPI
async def main():
client = AsyncZyteAPI()
api_response = await client.get(
{
"url": "https://httpbin.org/anything",
"httpResponseBody": True,
"httpRequestMethod": "POST",
"httpRequestText": '{"foo": "bar"}',
}
)
http_response_body = b64decode(api_response["httpResponseBody"])
body = json.loads(http_response_body)["data"]
print(body)
asyncio.run(main())
import json
from scrapy import Request, Spider
class HTTPBinOrgSpider(Spider):
name = "httpbin_org"
def start_requests(self):
yield Request(
"https://httpbin.org/anything",
method="POST",
body='{"foo": "bar"}',
)
def parse(self, response):
body = json.loads(response.body)["data"]
print(body)
Output:
{"foo": "bar"}
httpRequestBody
example
Note
Install and configure code example requirements and the Zyte CA certificate to run the example below.
using System;
using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;
HttpClientHandler handler = new HttpClientHandler()
{
AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);
var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);
client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");
var input = new Dictionary<string, object>(){
{"url", "https://httpbin.org/anything"},
{"httpResponseBody", true},
{"httpRequestMethod", "POST"},
{"httpRequestBody", "Zm9v"}
};
var inputJson = JsonSerializer.Serialize(input);
var content = new StringContent(inputJson, Encoding.UTF8, "application/json");
HttpResponseMessage response = await client.PostAsync("https://api.zyte.com/v1/extract", content);
var body = await response.Content.ReadAsByteArrayAsync();
var data = JsonDocument.Parse(body);
var base64HttpResponseBody = data.RootElement.GetProperty("httpResponseBody").ToString();
var httpResponseBody = System.Convert.FromBase64String(base64HttpResponseBody);
var responseData = JsonDocument.Parse(httpResponseBody);
var requestBody = responseData.RootElement.GetProperty("data").ToString();
Console.WriteLine(requestBody);
{"url": "https://httpbin.org/anything", "httpResponseBody": true, "httpRequestMethod": "POST", "httpRequestBody": "Zm9v"}
zyte-api input.jsonl \
| jq --raw-output .httpResponseBody \
| base64 --decode \
| jq --raw-output .data
{
"url": "https://httpbin.org/anything",
"httpResponseBody": true,
"httpRequestMethod": "POST",
"httpRequestBody": "Zm9v"
}
curl \
--user YOUR_API_KEY: \
--header 'Content-Type: application/json' \
--data @input.json \
--compressed \
https://api.zyte.com/v1/extract \
| jq --raw-output .httpResponseBody \
| base64 --decode \
| jq --raw-output .data
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Map;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;
class Example {
private static final String API_KEY = "YOUR_API_KEY";
public static void main(final String[] args)
throws InterruptedException, IOException, ParseException {
Map<String, Object> parameters =
ImmutableMap.of(
"url",
"https://httpbin.org/anything",
"httpResponseBody",
true,
"httpRequestMethod",
"POST",
"httpRequestBody",
"Zm9v");
String requestBody = new Gson().toJson(parameters);
HttpPost request = new HttpPost("https://api.zyte.com/v1/extract");
request.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
request.setEntity(new StringEntity(requestBody));
try (CloseableHttpClient client = HttpClients.createDefault()) {
try (CloseableHttpResponse response = client.execute(request)) {
HttpEntity entity = response.getEntity();
String apiResponse = EntityUtils.toString(entity, StandardCharsets.UTF_8);
JsonObject jsonObject = JsonParser.parseString(apiResponse).getAsJsonObject();
String base64HttpResponseBody = jsonObject.get("httpResponseBody").getAsString();
byte[] httpResponseBodyBytes = Base64.getDecoder().decode(base64HttpResponseBody);
String httpResponseBody = new String(httpResponseBodyBytes, StandardCharsets.UTF_8);
JsonObject data = JsonParser.parseString(httpResponseBody).getAsJsonObject();
String body = data.get("data").getAsString();
System.out.println(body);
}
}
}
private static String buildAuthHeader() {
String auth = API_KEY + ":";
String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
return "Basic " + encodedAuth;
}
}
const axios = require('axios')
axios.post(
'https://api.zyte.com/v1/extract',
{
url: 'https://httpbin.org/anything',
httpResponseBody: true,
httpRequestMethod: 'POST',
httpRequestBody: 'Zm9v'
},
{
auth: { username: 'YOUR_API_KEY' }
}
).then((response) => {
const httpResponseBody = Buffer.from(
response.data.httpResponseBody,
'base64'
)
const body = JSON.parse(httpResponseBody).data
console.log(body)
})
<?php
$client = new GuzzleHttp\Client();
$response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
'auth' => ['YOUR_API_KEY', ''],
'headers' => ['Accept-Encoding' => 'gzip'],
'json' => [
'url' => 'https://httpbin.org/anything',
'httpResponseBody' => true,
'httpRequestMethod' => 'POST',
'httpRequestBody' => 'Zm9v',
],
]);
$data = json_decode($response->getBody());
$http_response_body = base64_decode($data->httpResponseBody);
$body = json_decode($http_response_body)->data;
echo $body.PHP_EOL;
With the proxy mode, the request body from your requests is used automatically, be it plain text or binary.
curl \
--proxy api.zyte.com:8011 \
--proxy-user YOUR_API_KEY: \
--compressed \
-X POST \
-H "Content-Type: application/octet-stream" \
--data foo \
https://httpbin.org/anything \
| jq .data
import json
from base64 import b64decode
import requests
api_response = requests.post(
"https://api.zyte.com/v1/extract",
auth=("YOUR_API_KEY", ""),
json={
"url": "https://httpbin.org/anything",
"httpResponseBody": True,
"httpRequestMethod": "POST",
"httpRequestBody": "Zm9v",
},
)
http_response_body = b64decode(api_response.json()["httpResponseBody"])
body: str = json.loads(http_response_body)["data"]
print(body)
import asyncio
import json
from base64 import b64decode
from zyte_api import AsyncZyteAPI
async def main():
client = AsyncZyteAPI()
api_response = await client.get(
{
"url": "https://httpbin.org/anything",
"httpResponseBody": True,
"httpRequestMethod": "POST",
"httpRequestBody": "Zm9v",
}
)
http_response_body: bytes = b64decode(api_response["httpResponseBody"])
body = json.loads(http_response_body)["data"]
print(body)
asyncio.run(main())
import json
from scrapy import Request, Spider
class HTTPBinOrgSpider(Spider):
name = "httpbin_org"
def start_requests(self):
yield Request(
"https://httpbin.org/anything",
method="POST",
body=b"foo",
)
def parse(self, response):
body = json.loads(response.body)["data"]
print(body)
Output:
foo
Request headers#
In HTTP requests, use customHttpRequestHeaders to set request
headers. You can set any header except Cookie
(see
Cookies).
Tip
You can also set headers like Accept
, Accept-Encoding
,
Accept-Language
or User-Agent
, but it is usually best to let Zyte
API set those headers; it will use values consistent with the network stack
and other request parameters (e.g. device,
geolocation).
Example
Note
Install and configure code example requirements and the Zyte CA certificate to run the example below.
using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;
HttpClientHandler handler = new HttpClientHandler()
{
AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);
var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);
client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");
var input = new Dictionary<string, object>(){
{"url", "https://httpbin.org/anything"},
{"httpResponseBody", true},
{
"customHttpRequestHeaders",
new List<Dictionary<string, object>>()
{
new Dictionary<string, object>()
{
{"name", "Accept-Language"},
{"value", "fa"}
}
}
}
};
var inputJson = JsonSerializer.Serialize(input);
var content = new StringContent(inputJson, Encoding.UTF8, "application/json");
HttpResponseMessage response = await client.PostAsync("https://api.zyte.com/v1/extract", content);
var body = await response.Content.ReadAsByteArrayAsync();
var data = JsonDocument.Parse(body);
var base64HttpResponseBody = data.RootElement.GetProperty("httpResponseBody").ToString();
var httpResponseBody = System.Convert.FromBase64String(base64HttpResponseBody);
var responseData = JsonDocument.Parse(httpResponseBody);
var headerEnumerator = responseData.RootElement.GetProperty("headers").EnumerateObject();
var headers = new Dictionary<string, string>();
while (headerEnumerator.MoveNext())
{
headers.Add(
headerEnumerator.Current.Name.ToString(),
headerEnumerator.Current.Value.ToString()
);
}
{"url": "https://httpbin.org/anything", "httpResponseBody": true, "customHttpRequestHeaders": [{"name": "Accept-Language", "value": "fa"}]}
zyte-api input.jsonl \
| jq --raw-output .httpResponseBody \
| base64 --decode \
| jq .headers
{
"url": "https://httpbin.org/anything",
"httpResponseBody": true,
"customHttpRequestHeaders": [
{
"name": "Accept-Language",
"value": "fa"
}
]
}
curl \
--user YOUR_API_KEY: \
--header 'Content-Type: application/json' \
--data @input.json \
--compressed \
https://api.zyte.com/v1/extract \
| jq --raw-output .httpResponseBody \
| base64 --decode \
| jq .headers
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Collections;
import java.util.Map;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;
class Example {
private static final String API_KEY = "YOUR_API_KEY";
public static void main(final String[] args)
throws InterruptedException, IOException, ParseException {
Map<String, Object> customHttpRequestHeader =
ImmutableMap.of("name", "Accept-Language", "value", "fa");
Map<String, Object> parameters =
ImmutableMap.of(
"url",
"https://httpbin.org/anything",
"httpResponseBody",
true,
"customHttpRequestHeaders",
Collections.singletonList(customHttpRequestHeader));
String requestBody = new Gson().toJson(parameters);
HttpPost request = new HttpPost("https://api.zyte.com/v1/extract");
request.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
request.setEntity(new StringEntity(requestBody));
try (CloseableHttpClient client = HttpClients.createDefault()) {
try (CloseableHttpResponse response = client.execute(request)) {
HttpEntity entity = response.getEntity();
String apiResponse = EntityUtils.toString(entity, StandardCharsets.UTF_8);
JsonObject jsonObject = JsonParser.parseString(apiResponse).getAsJsonObject();
String base64HttpResponseBody = jsonObject.get("httpResponseBody").getAsString();
byte[] httpResponseBodyBytes = Base64.getDecoder().decode(base64HttpResponseBody);
String httpResponseBody = new String(httpResponseBodyBytes, StandardCharsets.UTF_8);
JsonObject data = JsonParser.parseString(httpResponseBody).getAsJsonObject();
JsonObject headers = data.get("headers").getAsJsonObject();
}
}
}
private static String buildAuthHeader() {
String auth = API_KEY + ":";
String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
return "Basic " + encodedAuth;
}
}
const axios = require('axios')
axios.post(
'https://api.zyte.com/v1/extract',
{
url: 'https://httpbin.org/anything',
httpResponseBody: true,
customHttpRequestHeaders: [
{
name: 'Accept-Language',
value: 'fa'
}
]
},
{
auth: { username: 'YOUR_API_KEY' }
}
).then((response) => {
const httpResponseBody = Buffer.from(
response.data.httpResponseBody,
'base64'
)
const headers = JSON.parse(httpResponseBody).headers
})
<?php
$client = new GuzzleHttp\Client();
$response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
'auth' => ['YOUR_API_KEY', ''],
'headers' => ['Accept-Encoding' => 'gzip'],
'json' => [
'url' => 'https://httpbin.org/anything',
'httpResponseBody' => true,
'customHttpRequestHeaders' => [
[
'name' => 'Accept-Language',
'value' => 'fa',
],
],
],
]);
$api = json_decode($response->getBody());
$http_response_body = base64_decode($api->httpResponseBody);
$data = json_decode($http_response_body);
$headers = $data->headers;
With the proxy mode, the request headers from your requests are used automatically.
curl \
--proxy api.zyte.com:8011 \
--proxy-user YOUR_API_KEY: \
--compressed \
-H "Accept-Language: fa" \
https://httpbin.org/anything \
| jq .headers
import json
from base64 import b64decode
import requests
api_response = requests.post(
"https://api.zyte.com/v1/extract",
auth=("YOUR_API_KEY", ""),
json={
"url": "https://httpbin.org/anything",
"httpResponseBody": True,
"customHttpRequestHeaders": [
{
"name": "Accept-Language",
"value": "fa",
},
],
},
)
http_response_body = b64decode(api_response.json()["httpResponseBody"])
headers = json.loads(http_response_body)["headers"]
import asyncio
import json
from base64 import b64decode
from zyte_api import AsyncZyteAPI
async def main():
client = AsyncZyteAPI()
api_response = await client.get(
{
"url": "https://httpbin.org/anything",
"httpResponseBody": True,
"customHttpRequestHeaders": [
{
"name": "Accept-Language",
"value": "fa",
},
],
}
)
http_response_body: bytes = b64decode(api_response["httpResponseBody"])
headers = json.loads(http_response_body)["headers"]
print(json.dumps(headers, indent=2))
asyncio.run(main())
import json
from scrapy import Request, Spider
class HTTPBinOrgSpider(Spider):
name = "httpbin_org"
def start_requests(self):
yield Request(
"https://httpbin.org/anything",
headers={"Accept-Language": "fa"},
)
def parse(self, response):
headers = json.loads(response.text)["headers"]
Output (first 5 lines):
{
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "fa",
"Host": "httpbin.org",
Redirection#
HTTP requests follow HTTP redirection by default. Set
followRedirect to False
to change that.
Note
Redirection works differently in browser requests.
Device emulation#
In HTTP requests, use device to set a type of device emulation,
either desktop
(default) or mobile
, to use for your request.
This option exists because some websites return different content depending on the type of device used to access them.
Note
In a request where you set device to mobile
, you
cannot use sessionContextParameters.actions.
Example
Note
Install and configure code example requirements and the Zyte CA certificate to run the example below.
using System;
using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;
HttpClientHandler handler = new HttpClientHandler()
{
AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);
var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);
client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");
var input = new Dictionary<string, object>(){
{"url", "https://httpbin.org/user-agent"},
{"httpResponseBody", true},
{"device", "mobile"}
};
var inputJson = JsonSerializer.Serialize(input);
var content = new StringContent(inputJson, Encoding.UTF8, "application/json");
HttpResponseMessage response = await client.PostAsync("https://api.zyte.com/v1/extract", content);
var body = await response.Content.ReadAsByteArrayAsync();
var data = JsonDocument.Parse(body);
var base64HttpResponseBody = data.RootElement.GetProperty("httpResponseBody").ToString();
var httpResponseBody = System.Convert.FromBase64String(base64HttpResponseBody);
var responseData = JsonDocument.Parse(httpResponseBody);
var headerEnumerator = responseData.RootElement.EnumerateObject();
while (headerEnumerator.MoveNext())
{
if (headerEnumerator.Current.Name.ToString() == "user-agent")
{
Console.WriteLine(headerEnumerator.Current.Value.ToString());
}
}
{"url": "https://httpbin.org/user-agent", "httpResponseBody": true, "device": "mobile"}
zyte-api input.jsonl \
| jq --raw-output .httpResponseBody \
| base64 --decode \
| jq --raw-output '.["user-agent"]'
{
"url": "https://httpbin.org/user-agent",
"httpResponseBody": true,
"device": "mobile"
}
curl \
--user YOUR_API_KEY: \
--header 'Content-Type: application/json' \
--data @input.json \
--compressed \
https://api.zyte.com/v1/extract \
| jq --raw-output .httpResponseBody \
| base64 --decode \
| jq --raw-output '.["user-agent"]'
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Map;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;
class Example {
private static final String API_KEY = "YOUR_API_KEY";
public static void main(final String[] args)
throws InterruptedException, IOException, ParseException {
Map<String, Object> parameters =
ImmutableMap.of(
"url", "https://httpbin.org/user-agent", "httpResponseBody", true, "device", "mobile");
String requestBody = new Gson().toJson(parameters);
HttpPost request = new HttpPost("https://api.zyte.com/v1/extract");
request.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
request.setEntity(new StringEntity(requestBody));
try (CloseableHttpClient client = HttpClients.createDefault()) {
try (CloseableHttpResponse response = client.execute(request)) {
HttpEntity entity = response.getEntity();
String apiResponse = EntityUtils.toString(entity, StandardCharsets.UTF_8);
JsonObject jsonObject = JsonParser.parseString(apiResponse).getAsJsonObject();
String base64HttpResponseBody = jsonObject.get("httpResponseBody").getAsString();
byte[] httpResponseBodyBytes = Base64.getDecoder().decode(base64HttpResponseBody);
String httpResponseBody = new String(httpResponseBodyBytes, StandardCharsets.UTF_8);
JsonObject data = JsonParser.parseString(httpResponseBody).getAsJsonObject();
String userAgent = data.get("user-agent").getAsString();
System.out.println(userAgent);
}
}
}
private static String buildAuthHeader() {
String auth = API_KEY + ":";
String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
return "Basic " + encodedAuth;
}
}
const axios = require('axios')
axios.post(
'https://api.zyte.com/v1/extract',
{
url: 'https://httpbin.org/user-agent',
httpResponseBody: true,
device: 'mobile'
},
{
auth: { username: 'YOUR_API_KEY' }
}
).then((response) => {
const httpResponseBody = Buffer.from(
response.data.httpResponseBody,
'base64'
)
console.log(JSON.parse(httpResponseBody)['user-agent'])
})
<?php
$client = new GuzzleHttp\Client();
$response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
'auth' => ['YOUR_API_KEY', ''],
'headers' => ['Accept-Encoding' => 'gzip'],
'json' => [
'url' => 'https://httpbin.org/user-agent',
'httpResponseBody' => true,
'device' => 'mobile',
],
]);
$api = json_decode($response->getBody());
$http_response_body = base64_decode($api->httpResponseBody);
$data = json_decode($http_response_body);
echo $data->{'user-agent'}."\n";
With the proxy mode, use the Zyte-Device header.
curl \
--proxy api.zyte.com:8011 \
--proxy-user YOUR_API_KEY: \
--compressed \
-H "Zyte-Device: mobile" \
https://httpbin.org/user-agent \
| jq --raw-output '.["user-agent"]'
import json
from base64 import b64decode
import requests
api_response = requests.post(
"https://api.zyte.com/v1/extract",
auth=("YOUR_API_KEY", ""),
json={
"url": "https://httpbin.org/user-agent",
"httpResponseBody": True,
"device": "mobile",
},
)
http_response_body = b64decode(api_response.json()["httpResponseBody"])
user_agent = json.loads(http_response_body)["user-agent"]
print(user_agent)
import asyncio
import json
from base64 import b64decode
from zyte_api import AsyncZyteAPI
async def main():
client = AsyncZyteAPI()
api_response = await client.get(
{
"url": "https://httpbin.org/user-agent",
"httpResponseBody": True,
"device": "mobile",
}
)
http_response_body: bytes = b64decode(api_response["httpResponseBody"])
user_agent = json.loads(http_response_body)["user-agent"]
print(user_agent)
asyncio.run(main())
import json
from scrapy import Request, Spider
class HTTPBinOrgSpider(Spider):
name = "httpbin_org"
def start_requests(self):
yield Request(
"https://httpbin.org/user-agent",
meta={
"zyte_api_automap": {
"device": "mobile",
}
},
)
def parse(self, response):
user_agent = json.loads(response.text)["user-agent"]
print(user_agent)
Example output (may vary):
Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Mobile Safari/537.36
Decoding HTML#
HTML extracted as a response body needs to be decoded.
HTML content can be encoded with one of many character encodings, and you must determine the character encoding used so that you can decode that HTML content accordingly.
The best way to determine the encoding of HTML content is to follow the encoding sniffing algorithm defined in the HTML standard.
In addition to the HTML content, the HTML encoding sniffing algorithm takes
into account any character encoding provided in the optional charset
parameter of media types declared in the Content-Type
response header, so
make sure you get the response headers in
addition to the response body if you are following the HTML encoding sniffing
algorithm.
Example
Note
Install and configure code example requirements and the Zyte CA certificate to run the example below.
Use file to find the media type of a previously-downloaded response based solely on its body (i.e. not following the HTML encoding sniffing algorithm).
file --mime-encoding output.html
Use content-type-parser, html-encoding-sniffer and whatwg-encoding:
const contentTypeParser = require('content-type-parser')
const htmlEncodingSniffer = require('html-encoding-sniffer')
const whatwgEncoding = require('whatwg-encoding')
// …
const httpResponseHeaders = response.data.httpResponseHeaders
let contentTypeCharset
httpResponseHeaders.forEach(function (item) {
if (item.name.toLowerCase() === 'content-type') {
contentTypeCharset = contentTypeParser(item.value).get('charset')
}
})
const httpResponseBody = Buffer.from(response.data.httpResponseBody, 'base64')
const encoding = htmlEncodingSniffer(httpResponseBody, {
transportLayerEncodingLabel: contentTypeCharset
})
const html = whatwgEncoding.decode(httpResponseBody, encoding)
web-poet provides a response wrapper that automatically decodes the response body following an encoding sniffing algorithm similar to the one defined in the HTML standard.
Provided that you have extracted a response with both body and headers, and you have Base64-decoded the response body, you can decode the HTML bytes as follows:
from web_poet import HttpResponse
# …
headers = tuple(
(item['name'], item['value'])
for item in http_response_headers
)
response = HttpResponse(
url='https://example.com',
body=http_response_body,
status=200,
headers=headers,
)
html = response.text
In transparent mode, regular Scrapy requests targeting HTML resources decode them by default. See Zyte API HTTP requests.
HTML and browser HTML#
HTML found in httpResponseBody is usually different from HTML found in browserHtml (browser HTML):
httpResponseBody does not reflect changes that a webpage makes at run time using JavaScript, such as loading content from additional URLs, or moving or reformatting content within the webpage.
browserHtml includes a normalization of the HTML from the underlying HTTP response, which web browsers perform according to the HTML5 specification. So the content of HTML and browser HTML could be different even when there is no JavaScript involved.
Parsing HTML from httpResponseBody with libraries that do not implement HTML5 parsing, such as lxml.html (used by Scrapy by default), results in a different tree structure.
With an HTML5-compatible parser the resulting tree structure would be the same, provided JavaScript does not cause any other difference.
Because of these differences, switching between these HTML inputs can break your existing parsing code and require changes, such as updating XPath or CSS selectors.