Zyte API shared features#

Learn here about Zyte API features that you can use with HTTP requests, browser requests, and automatic extraction: geolocation, IP type, cookies, sessions, response headers, and metadata.

Geolocation#

The geographical point of origin of a request in terms of IP address can influence the response content. Some websites adjust the language or currency based on the country of origin. Some websites only allow traffic from specific countries.

By default, Zyte API uses the most fitting geolocation based on the target website. You can override the country of origin used for a given request with the geolocation request field.

Note

Zyte API provides 2 sets of geolocations, standard and extended, listed in the reference documentation of geolocation.

Setting geolocation explicitly on a request using an extended geolocation, instead of letting Zyte API choose the right geolocation based on the target website, affects request cost.

Example

Note

Install and configure code example requirements and the Zyte CA certificate to run the example below.

using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;

HttpClientHandler handler = new HttpClientHandler()
{
    AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);

var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);

client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");

var input = new Dictionary<string, object>(){
    {"url", "http://ip-api.com/json"},
    {"httpResponseBody", true},
    {"geolocation", "AU"}
};
var inputJson = JsonSerializer.Serialize(input);
var content = new StringContent(inputJson, Encoding.UTF8, "application/json");

HttpResponseMessage response = await client.PostAsync("https://api.zyte.com/v1/extract", content);
var body = await response.Content.ReadAsByteArrayAsync();

var data = JsonDocument.Parse(body);
var base64HttpResponseBody = data.RootElement.GetProperty("httpResponseBody").ToString();
var httpResponseBody = System.Convert.FromBase64String(base64HttpResponseBody);

var responseData = JsonDocument.Parse(httpResponseBody);
var countryCode = responseData.RootElement.GetProperty("countryCode").ToString();
input.jsonl#
{"url": "http://ip-api.com/json", "httpResponseBody": true, "geolocation": "AU"}
zyte-api input.jsonl \
    | jq --raw-output .httpResponseBody \
    | base64 --decode \
    | jq .countryCode
input.json#
{
    "url": "http://ip-api.com/json",
    "httpResponseBody": true,
    "geolocation": "AU"
}
curl \
    --user YOUR_API_KEY: \
    --header 'Content-Type: application/json' \
    --data @input.json \
    --compressed \
    https://api.zyte.com/v1/extract \
    | jq --raw-output .httpResponseBody \
    | base64 --decode \
    | jq .countryCode
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Map;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;

class Example {
  private static final String API_KEY = "YOUR_API_KEY";

  public static void main(final String[] args)
      throws InterruptedException, IOException, ParseException {
    Map<String, Object> parameters =
        ImmutableMap.of(
            "url", "http://ip-api.com/json", "httpResponseBody", true, "geolocation", "AU");
    String requestBody = new Gson().toJson(parameters);

    HttpPost request = new HttpPost("https://api.zyte.com/v1/extract");
    request.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
    request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
    request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
    request.setEntity(new StringEntity(requestBody));

    try (CloseableHttpClient client = HttpClients.createDefault()) {
      try (CloseableHttpResponse response = client.execute(request)) {
        HttpEntity entity = response.getEntity();
        String apiResponse = EntityUtils.toString(entity, StandardCharsets.UTF_8);
        JsonObject jsonObject = JsonParser.parseString(apiResponse).getAsJsonObject();
        String base64HttpResponseBody = jsonObject.get("httpResponseBody").getAsString();
        byte[] httpResponseBodyBytes = Base64.getDecoder().decode(base64HttpResponseBody);
        String httpResponseBody = new String(httpResponseBodyBytes, StandardCharsets.UTF_8);
        JsonObject data = JsonParser.parseString(httpResponseBody).getAsJsonObject();
        String countryCode = data.get("countryCode").getAsString();
      }
    }
  }

  private static String buildAuthHeader() {
    String auth = API_KEY + ":";
    String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
    return "Basic " + encodedAuth;
  }
}
const axios = require('axios')

axios.post(
  'https://api.zyte.com/v1/extract',
  {
    url: 'http://ip-api.com/json',
    httpResponseBody: true,
    geolocation: 'AU'
  },
  {
    auth: { username: 'YOUR_API_KEY' }
  }
).then((response) => {
  const httpResponseBody = Buffer.from(
    response.data.httpResponseBody,
    'base64'
  )
  const data = JSON.parse(httpResponseBody)
  const countryCode = data.countryCode
})
<?php

$client = new GuzzleHttp\Client();
$response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
    'auth' => ['YOUR_API_KEY', ''],
    'headers' => ['Accept-Encoding' => 'gzip'],
    'json' => [
        'url' => 'http://ip-api.com/json',
        'httpResponseBody' => true,
        'geolocation' => 'AU',
    ],
]);
$api = json_decode($response->getBody());
$http_response_body = base64_decode($api->httpResponseBody);
$data = json_decode($http_response_body);
$country_code = $data->countryCode;

With the proxy mode, use the Zyte-Geolocation header.

curl \
    --proxy api.zyte.com:8011 \
    --proxy-user YOUR_API_KEY: \
    --compressed \
    -H "Zyte-Geolocation: US" \
    http://ip-api.com/json \
    | jq .countryCode
import json
from base64 import b64decode

import requests

api_response = requests.post(
    "https://api.zyte.com/v1/extract",
    auth=("YOUR_API_KEY", ""),
    json={
        "url": "http://ip-api.com/json",
        "httpResponseBody": True,
        "geolocation": "AU",
    },
)
http_response_body: bytes = b64decode(api_response.json()["httpResponseBody"])
response_data = json.loads(http_response_body)
country_code = response_data["countryCode"]
import asyncio
import json
from base64 import b64decode

from zyte_api import AsyncZyteAPI


async def main():
    client = AsyncZyteAPI()
    api_response = await client.get(
        {
            "url": "http://ip-api.com/json",
            "httpResponseBody": True,
            "geolocation": "AU",
        }
    )
    http_response_body: bytes = b64decode(api_response["httpResponseBody"])
    response_data = json.loads(http_response_body)
    print(response_data["countryCode"])


asyncio.run(main())
import json

from scrapy import Request, Spider


class IPAPIComSpider(Spider):
    name = "ip_api_com"

    def start_requests(self):
        yield Request(
            "http://ip-api.com/json",
            meta={
                "zyte_api_automap": {
                    "geolocation": "AU",
                },
            },
        )

    def parse(self, response):
        response_data = json.loads(response.body)
        country_code = response_data["countryCode"]

Output:

AU

IP type#

IP addresses can be categorized in one of the following types:

  • Data center IP addresses are owned by cloud companies like web hosting providers.

  • Residential IP addresses are assigned by an Internet service provider to a home.

The type of IP address of a request can influence the response content. Some websites return different content depending on the IP type, or only allow requests from residential IP addresses.

By default, Zyte API uses the most fitting IP type based on the target website. You can override the IP type used for a given request by setting the ipType request field to either datacenter or residential.

Warning

Setting ipType explicitly to residential, instead of letting Zyte API choose the right IP type based on the target website, requires completing our KYC procedure and affects request cost.

Example

Note

Install and configure code example requirements and the Zyte CA certificate to run the example below.

using System;
using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;
using System.Xml.XPath;
using HtmlAgilityPack;

HttpClientHandler handler = new HttpClientHandler()
{
    AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);

var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);

client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");

string[] ipTypes = { "datacenter", "residential" };
for (int i = 0; i < ipTypes.Length; i++)
{
    var input = new Dictionary<string, object>(){
        {"url", "https://www.whatismyisp.com/"},
        {"httpResponseBody", true},
        {"ipType", ipTypes[i]}
    };
    var inputJson = JsonSerializer.Serialize(input);
    var content = new StringContent(inputJson, Encoding.UTF8, "application/json");

    HttpResponseMessage response = await client.PostAsync("https://api.zyte.com/v1/extract", content);
    var body = await response.Content.ReadAsByteArrayAsync();

    var data = JsonDocument.Parse(body);
    var base64HttpResponseBody = data.RootElement.GetProperty("httpResponseBody").ToString();
    var httpResponseBodyBytes = System.Convert.FromBase64String(base64HttpResponseBody);
    var httpResponseBody = System.Text.Encoding.UTF8.GetString(httpResponseBodyBytes);
    var htmlDocument = new HtmlDocument();
    htmlDocument.LoadHtml(httpResponseBody);
    var navigator = htmlDocument.CreateNavigator();
    var nodeIterator = (XPathNodeIterator)navigator.Evaluate("//h1/span/text()");
    nodeIterator.MoveNext();
    var isp = nodeIterator.Current.ToString();

    Console.WriteLine(isp);
}
input.jsonl#
{"url": "https://www.whatismyisp.com/", "httpResponseBody": true, "ipType": "datacenter"}
{"url": "https://www.whatismyisp.com/", "httpResponseBody": true, "ipType": "residential"}
zyte-api input.jsonl 2> /dev/null \
    | xargs -d\\n -n 1 \
    bash -c "
        jq --raw-output .httpResponseBody <<< \"\$0\" \
        | base64 --decode \
        | xmllint --html --xpath 'string(//h1/span/text())' --noblanks - 2> /dev/null
"
input.jsonl#
{"url": "https://www.whatismyisp.com/", "httpResponseBody": true, "ipType": "datacenter"}
{"url": "https://www.whatismyisp.com/", "httpResponseBody": true, "ipType": "residential"}
cat input.jsonl \
    | xargs -P 2 -d\\n -n 1 \
    bash -c "
        curl \
                --user YOUR_API_KEY: \
                --header 'Content-Type: application/json' \
                --data \"\$0\" \
                --compressed \
                https://api.zyte.com/v1/extract \
            2> /dev/null \
            | jq --raw-output .httpResponseBody \
            | base64 --decode \
            | xmllint --html --xpath 'string(//h1/span/text())' --noblanks - 2> /dev/null
"
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Map;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

class Example {

  private static final String API_KEY = "YOUR_API_KEY";

  public static void main(final String[] args)
      throws InterruptedException, IOException, ParseException {
    String[] ipTypes = {"datacenter", "residential"};
    for (String ipType : ipTypes) {
      Map<String, Object> parameters =
          ImmutableMap.of(
              "url", "https://www.whatismyisp.com/", "httpResponseBody", true, "ipType", ipType);
      String requestBody = new Gson().toJson(parameters);

      HttpPost request = new HttpPost("https://api.zyte.com/v1/extract");
      request.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
      request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
      request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
      request.setEntity(new StringEntity(requestBody));

      try (CloseableHttpClient client = HttpClients.createDefault()) {
        try (CloseableHttpResponse response = client.execute(request)) {
          HttpEntity entity = response.getEntity();
          String apiResponse = EntityUtils.toString(entity, StandardCharsets.UTF_8);
          JsonObject jsonObject = JsonParser.parseString(apiResponse).getAsJsonObject();
          String base64HttpResponseBody = jsonObject.get("httpResponseBody").getAsString();
          byte[] httpResponseBodyBytes = Base64.getDecoder().decode(base64HttpResponseBody);
          String httpResponseBody = new String(httpResponseBodyBytes, StandardCharsets.UTF_8);
          Document document = Jsoup.parse(httpResponseBody);
          String logout = document.select("h1 > span:first-of-type").text();
          System.out.println(logout);
        }
      }
    }
  }

  private static String buildAuthHeader() {
    String auth = API_KEY + ":";
    String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
    return "Basic " + encodedAuth;
  }
}
const axios = require('axios')
const cheerio = require('cheerio')

const ipTypes = ['datacenter', 'residential']
for (const ipType of ipTypes) {
  axios.post(
    'https://api.zyte.com/v1/extract',
    {
      url: 'https://www.whatismyisp.com/',
      httpResponseBody: true,
      ipType
    },
    {
      auth: { username: 'YOUR_API_KEY' }
    }
  ).then((response) => {
    const httpResponseBody = Buffer.from(
      response.data.httpResponseBody,
      'base64'
    )
    const $ = cheerio.load(httpResponseBody)
    const logout = $('h1 > span:first-of-type').text()
    console.log(logout)
  })
}
<?php

error_reporting(E_ERROR | E_PARSE);
$client = new GuzzleHttp\Client();
$ip_types = ['datacenter', 'residential'];
foreach ($ip_types as &$ip_type) {
    $response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
        'auth' => ['YOUR_API_KEY', ''],
        'headers' => ['Accept-Encoding' => 'gzip'],
        'json' => [
            'url' => 'https://www.whatismyisp.com/',
            'httpResponseBody' => true,
            'ipType' => $ip_type,
        ],
    ]);
    $data = json_decode($response->getBody());
    $http_response_body = base64_decode($data->httpResponseBody);
    $doc = new DOMDocument();
    $doc->loadHTML($http_response_body);
    $xpath = new DOMXPath($doc);
    $logout = $xpath->query('//h1/span/text()')->item(0)->nodeValue;
    echo $logout."\n";
}

With the proxy mode, use the Zyte-IPType header.

for ip_type in datacenter residential
do
    curl \
        --proxy api.zyte.com:8011 \
        --proxy-user YOUR_API_KEY: \
        --header "Zyte-IPType: $ip_type" \
        --compressed \
        https://www.whatismyisp.com/ \
        2> /dev/null \
        | xmllint --html --xpath 'string(//h1/span/text())' --noblanks - 2> /dev/null
done
from base64 import b64decode

import requests
from parsel import Selector

for ip_type in ("datacenter", "residential"):
    api_response = requests.post(
        "https://api.zyte.com/v1/extract",
        auth=("YOUR_API_KEY", ""),
        json={
            "url": "https://www.whatismyisp.com/",
            "httpResponseBody": True,
            "ipType": ip_type,
        },
    )
    http_response_body_bytes = b64decode(api_response.json()["httpResponseBody"])
    http_response_body = http_response_body_bytes.decode()
    logout = Selector(http_response_body).css("h1 > span::text").get()
    print(logout)
import asyncio
from base64 import b64decode

from parsel import Selector
from zyte_api import AsyncZyteAPI


async def main():
    client = AsyncZyteAPI()
    for ip_type in ("datacenter", "residential"):
        api_response = await client.get(
            {
                "url": "https://www.whatismyisp.com/",
                "httpResponseBody": True,
                "ipType": ip_type,
            },
        )
        http_response_body_bytes = b64decode(api_response["httpResponseBody"])
        http_response_body = http_response_body_bytes.decode()
        logout = Selector(http_response_body).css("h1 > span::text").get()
        print(logout)


asyncio.run(main())
from scrapy import Request, Spider


class WhatIsMyIspComSpider(Spider):
    name = "whatismyisp_com"

    def start_requests(self):
        for ip_type in ("datacenter", "residential"):
            yield Request(
                "https://www.whatismyisp.com/",
                meta={
                    "zyte_api_automap": {
                        "ipType": ip_type,
                    },
                },
            )

    def parse(self, response):
        print(response.css("h1 > span::text").get())

Output:

[A web hosting company]
[An Internet service provider]

Cookies#

Some websites use cookies to track sessions and user preferences like language, address, etc.

Use the requestCookies and responseCookies request fields to set and get cookies. See Example 1 below.

A common usage pattern with cookies is to send a browser request with the responseCookies request field set to true to a webpage that requires a browser to generate a valid session cookie, and then copy the responseCookies response field value into the requestCookies request field of follow-up HTTP requests. This allows using sessions on websites as long as the target website only checks for the cookie presence, which is often the case (if not, use sessions). See Example 2 below.

Example 1: Set a cookie and get it back

Note

Install and configure code example requirements and the Zyte CA certificate to run the example below.

The following code example sends a cookie to httpbin.org and prints the cookies that httpbin.org reports to have received:

using System;
using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;

HttpClientHandler handler = new HttpClientHandler()
{
    AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);

var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);

client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");

var input = new Dictionary<string, object>(){
    {"url", "https://httpbin.org/cookies"},
    {"httpResponseBody", true},
    {
        "requestCookies",
        new List<Dictionary<string, string>>()
        {
            new Dictionary<string, string>()
            {
                {"name", "foo"},
                {"value", "bar"},
                {"domain", "httpbin.org"}
            }
        }
    }
};
var inputJson = JsonSerializer.Serialize(input);
var content = new StringContent(inputJson, Encoding.UTF8, "application/json");

HttpResponseMessage response = await client.PostAsync("https://api.zyte.com/v1/extract", content);
var body = await response.Content.ReadAsByteArrayAsync();

var data = JsonDocument.Parse(body);
var base64HttpResponseBody = data.RootElement.GetProperty("httpResponseBody").ToString();
var httpResponseBody = System.Convert.FromBase64String(base64HttpResponseBody);
var result = System.Text.Encoding.UTF8.GetString(httpResponseBody);

Console.WriteLine(result);
input.jsonl#
{"url": "https://httpbin.org/cookies", "httpResponseBody": true, "requestCookies": [{"name": "foo", "value": "bar", "domain": "httpbin.org"}]}
zyte-api input.jsonl \
    | jq --raw-output .httpResponseBody \
    | base64 --decode
input.json#
{
    "url": "https://httpbin.org/cookies",
    "httpResponseBody": true,
    "requestCookies": [
        {
            "name": "foo",
            "value": "bar",
            "domain": "httpbin.org"
        }
    ]
}
curl \
    --user YOUR_API_KEY: \
    --header 'Content-Type: application/json' \
    --data @input.json \
    --compressed \
    https://api.zyte.com/v1/extract \
| jq --raw-output .httpResponseBody \
| base64 --decode
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Collections;
import java.util.Map;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;

class Example {
  private static final String API_KEY = "YOUR_API_KEY";

  public static void main(final String[] args)
      throws InterruptedException, IOException, ParseException {
    Map<String, String> cookies =
        ImmutableMap.of("name", "foo", "value", "bar", "domain", "httpbin.org");
    Map<String, Object> parameters =
        ImmutableMap.of(
            "url",
            "https://httpbin.org/cookies",
            "httpResponseBody",
            true,
            "requestCookies",
            Collections.singletonList(cookies));
    String requestBody = new Gson().toJson(parameters);

    HttpPost request = new HttpPost("https://api.zyte.com/v1/extract");
    request.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
    request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
    request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
    request.setEntity(new StringEntity(requestBody));

    try (CloseableHttpClient client = HttpClients.createDefault()) {
      try (CloseableHttpResponse response = client.execute(request)) {
        HttpEntity entity = response.getEntity();
        String apiResponse = EntityUtils.toString(entity, StandardCharsets.UTF_8);
        JsonObject jsonObject = JsonParser.parseString(apiResponse).getAsJsonObject();
        String base64HttpResponseBody = jsonObject.get("httpResponseBody").getAsString();
        byte[] httpResponseBodyBytes = Base64.getDecoder().decode(base64HttpResponseBody);
        String httpResponseBody = new String(httpResponseBodyBytes, StandardCharsets.UTF_8);
        System.out.println(httpResponseBody);
      }
    }
  }

  private static String buildAuthHeader() {
    String auth = API_KEY + ":";
    String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
    return "Basic " + encodedAuth;
  }
}
const axios = require('axios')

axios.post(
  'https://api.zyte.com/v1/extract',
  {
    url: 'https://httpbin.org/cookies',
    httpResponseBody: true,
    requestCookies: [
      {
        name: 'foo',
        value: 'bar',
        domain: 'httpbin.org'
      }
    ]
  },
  {
    auth: { username: 'YOUR_API_KEY' }
  }
).then((response) => {
  const httpResponseBody = Buffer.from(
    response.data.httpResponseBody,
    'base64'
  )
  console.log(httpResponseBody.toString())
})
<?php

$client = new GuzzleHttp\Client();
$response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
    'auth' => ['YOUR_API_KEY', ''],
    'headers' => ['Accept-Encoding' => 'gzip'],
    'json' => [
        'url' => 'https://httpbin.org/cookies',
        'httpResponseBody' => true,
        'requestCookies' => [
            [
                'name' => 'foo',
                'value' => 'bar',
                'domain' => 'httpbin.org',
            ],
        ],
    ],
]);
$api = json_decode($response->getBody());
$http_response_body = base64_decode($api->httpResponseBody);
echo $http_response_body;

With the proxy mode, the request Cookie header from your requests is used automatically to set cookies for the target URL domain.

curl \
    --proxy api.zyte.com:8011 \
    --proxy-user YOUR_API_KEY: \
    --compressed \
    -H "Cookie: foo=bar" \
    https://httpbin.org/cookies

Setting cookies for additional domains is not supported.

from base64 import b64decode

import requests

api_response = requests.post(
    "https://api.zyte.com/v1/extract",
    auth=("YOUR_API_KEY", ""),
    json={
        "url": "https://httpbin.org/cookies",
        "httpResponseBody": True,
        "requestCookies": [
            {
                "name": "foo",
                "value": "bar",
                "domain": "httpbin.org",
            },
        ],
    },
)
http_response_body = b64decode(api_response.json()["httpResponseBody"])
print(http_response_body.decode())
import asyncio
from base64 import b64decode

from zyte_api import AsyncZyteAPI


async def main():
    client = AsyncZyteAPI()
    api_response = await client.get(
        {
            "url": "https://httpbin.org/cookies",
            "httpResponseBody": True,
            "requestCookies": [
                {
                    "name": "foo",
                    "value": "bar",
                    "domain": "httpbin.org",
                },
            ],
        }
    )
    http_response_body = b64decode(api_response["httpResponseBody"]).decode()
    print(http_response_body)


asyncio.run(main())
from scrapy import Request, Spider


class HTTPBinOrgSpider(Spider):
    name = "httpbin_org"

    def start_requests(self):
        yield Request(
            "https://httpbin.org/cookies",
            meta={
                "zyte_api_automap": {
                    "requestCookies": [
                        {
                            "name": "foo",
                            "value": "bar",
                            "domain": "httpbin.org",
                        },
                    ],
                },
            },
        )

    def parse(self, response):
        print(response.text)

Output:

{
  "cookies": {
    "foo": "bar"
  }
}
Example 2: Reuse browser cookies in HTTP requests

Send a browser request to the home page of a website, and use its response cookies as request cookies in an HTTP request to a different URL of that website.

using System;
using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;

HttpClientHandler handler = new HttpClientHandler()
{
    AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);

var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);

client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");

var browserInput = new Dictionary<string, object>(){
    {"url", "https://toscrape.com/"},
    {"browserHtml", true},
    {"responseCookies", true}
};
var browserInputJson = JsonSerializer.Serialize(browserInput);
var browserContent = new StringContent(browserInputJson, Encoding.UTF8, "application/json");
HttpResponseMessage browserResponse = await client.PostAsync("https://api.zyte.com/v1/extract", browserContent);
var browserResponseBody = await browserResponse.Content.ReadAsByteArrayAsync();
var browserData = JsonDocument.Parse(browserResponseBody);

var httpInput = new Dictionary<string, object>(){
    {"url", "https://toscrape.com/"},
    {"httpResponseBody", true},
    {"requestCookies", browserData.RootElement.GetProperty("responseCookies")}
};
var httpInputJson = JsonSerializer.Serialize(httpInput);
var httpContent = new StringContent(httpInputJson, Encoding.UTF8, "application/json");
HttpResponseMessage httpResponse = await client.PostAsync("https://api.zyte.com/v1/extract", httpContent);
var httpResponseBody = await httpResponse.Content.ReadAsByteArrayAsync();
var httpData = JsonDocument.Parse(httpResponseBody);
var base64HttpResponseBodyField = httpData.RootElement.GetProperty("httpResponseBody").ToString();
var httpResponseBodyField = System.Convert.FromBase64String(base64HttpResponseBodyField);
var result = System.Text.Encoding.UTF8.GetString(httpResponseBodyField);

Console.WriteLine(result);
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Map;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;

class Example {
  private static final String API_KEY = "YOUR_API_KEY";

  public static void main(final String[] args)
      throws InterruptedException, IOException, ParseException {
    Map<String, Object> browserParameters =
        ImmutableMap.of(
            "url", "https://toscrape.com/", "browserHtml", true, "responseCookies", true);
    String browserRequestBody = new Gson().toJson(browserParameters);

    HttpPost browserRequest = new HttpPost("https://api.zyte.com/v1/extract");
    browserRequest.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
    browserRequest.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
    browserRequest.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
    browserRequest.setEntity(new StringEntity(browserRequestBody));

    try (CloseableHttpClient client = HttpClients.createDefault()) {
      try (CloseableHttpResponse browserResponse = client.execute(browserRequest)) {
        HttpEntity browserEntity = browserResponse.getEntity();
        String browserApiResponse = EntityUtils.toString(browserEntity, StandardCharsets.UTF_8);
        JsonObject browserJsonObject = JsonParser.parseString(browserApiResponse).getAsJsonObject();

        Map<String, Object> httpParameters =
            ImmutableMap.of(
                "url",
                "https://books.toscrape.com/",
                "httpResponseBody",
                true,
                "requestCookies",
                browserJsonObject.get("responseCookies"));
        String httpRequestBody = new Gson().toJson(httpParameters);

        HttpPost httpRequest = new HttpPost("https://api.zyte.com/v1/extract");
        httpRequest.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
        httpRequest.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
        httpRequest.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
        httpRequest.setEntity(new StringEntity(httpRequestBody));

        try (CloseableHttpResponse httpResponse = client.execute(httpRequest)) {
          HttpEntity httpEntity = httpResponse.getEntity();
          String httpApiResponse = EntityUtils.toString(httpEntity, StandardCharsets.UTF_8);
          JsonObject httpJsonObject = JsonParser.parseString(httpApiResponse).getAsJsonObject();
          String base64HttpResponseBody = httpJsonObject.get("httpResponseBody").getAsString();
          byte[] httpResponseBodyBytes = Base64.getDecoder().decode(base64HttpResponseBody);
          String httpResponseBody = new String(httpResponseBodyBytes, StandardCharsets.UTF_8);
          System.out.println(httpResponseBody);
        }
      }
    }
  }

  private static String buildAuthHeader() {
    String auth = API_KEY + ":";
    String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
    return "Basic " + encodedAuth;
  }
}
const axios = require('axios')

axios.post(
  'https://api.zyte.com/v1/extract',
  {
    url: 'https://toscrape.com/',
    browserHtml: true,
    responseCookies: true
  },
  {
    auth: { username: 'YOUR_API_KEY' }
  }
).then((browserResponse) => {
  axios.post(
    'https://api.zyte.com/v1/extract',
    {
      url: 'https://books.toscrape.com/',
      httpResponseBody: true,
      requestCookies: browserResponse.data.responseCookies
    },
    {
      auth: { username: 'YOUR_API_KEY' }
    }
  ).then((httpResponse) => {
    const httpResponseBody = Buffer.from(
      httpResponse.data.httpResponseBody,
      'base64'
    )
    console.log(httpResponseBody.toString())
  })
})
<?php

$client = new GuzzleHttp\Client();
$browser_response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
    'auth' => ['YOUR_API_KEY', ''],
    'headers' => ['Accept-Encoding' => 'gzip'],
    'json' => [
        'url' => 'https://toscrape.com/',
        'browserHtml' => true,
        'responseCookies' => true,
    ],
]);
$browser_data = json_decode($browser_response->getBody());
$http_response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
    'auth' => ['YOUR_API_KEY', ''],
    'headers' => ['Accept-Encoding' => 'gzip'],
    'json' => [
        'url' => 'https://books.toscrape.com/',
        'httpResponseBody' => true,
        'requestCookies' => $browser_data->responseCookies,
    ],
]);
$http_data = json_decode($http_response->getBody());
$http_response_body = base64_decode($http_data->httpResponseBody);
echo $http_response_body;
from base64 import b64decode

import requests

browser_response = requests.post(
    "https://api.zyte.com/v1/extract",
    auth=("YOUR_API_KEY", ""),
    json={
        "url": "https://toscrape.com/",
        "browserHtml": True,
        "responseCookies": True,
    },
)
http_response = requests.post(
    "https://api.zyte.com/v1/extract",
    auth=("YOUR_API_KEY", ""),
    json={
        "url": "https://books.toscrape.com/",
        "httpResponseBody": True,
        "requestCookies": browser_response.json()["responseCookies"],
    },
)
http_response_body = b64decode(http_response.json()["httpResponseBody"])
print(http_response_body.decode())
import asyncio
from base64 import b64decode

from zyte_api import AsyncZyteAPI


async def main():
    client = AsyncZyteAPI()
    browser_response = await client.get(
        {
            "url": "https://toscrape.com/",
            "browserHtml": True,
            "responseCookies": True,
        }
    )
    http_response = await client.get(
        {
            "url": "https://books.toscrape.com/",
            "httpResponseBody": True,
            "requestCookies": browser_response["responseCookies"],
        }
    )
    http_response_body = b64decode(http_response["httpResponseBody"]).decode()
    print(http_response_body)


asyncio.run(main())
from scrapy import Request, Spider


class ToScrapeComSpider(Spider):
    name = "toscrape_com"

    def start_requests(self):
        yield Request(
            "https://toscrape.com/",
            callback=self.parse_browser,
            meta={
                "zyte_api_automap": {
                    "browserHtml": True,
                    "responseCookies": True,
                },
            },
        )

    def parse_browser(self, response):
        yield response.follow(
            "https://books.toscrape.com/",
            callback=self.parse_http,
            meta={
                "zyte_api_automap": {
                    "requestCookies": response.raw_api_response["responseCookies"],
                },
            },
        )

    def parse_http(self, response):
        print(response.text)

Sessions#

In web scraping, a session is a set of request conditions (IP address, cookie jar, network stack, etc.) that, when shared by two or more requests, make those requests seem part of an organic web browsing session.

For some websites, reusing cookies can be enough to maintain a session. But on other websites, sessions get invalidated when their requests do not share the same IP address, network stack, etc.

Zyte API supports 2 different ways to define request sessions:

Tip

scrapy-zyte-api also implements an alternative session management API, similar to that of server-managed sessions, but built on top of client-managed sessions.

Zyte API sessions can be specially useful for:

  • Crawling stateful parts of websites, like multi-page forms, pagination or scrolling, where the time limit of actions can be a problem.

    Note

    Sessions do not maintain browser state, they only make it seem so to target websites. In other words, when you send a 2nd request with the same session, your request does not use the same browser instance as the 1st request.

    Maintaining browser state between requests is a planned feature.

  • Optimizing scenarios where you need to set initial, session conditions (language, country, currency, address, etc.) shared by many follow-up requests.

    For example:

    • If you have multiple browser requests that all share a set of initial actions for basic session setup, such as using the setLocation action or similar, sessions can get you faster responses and give you extra run time for other actions.

    • If you have multiple HTTP requests that need cookies from an earlier browser request, and you need those follow-up requests to be sent with the same session as the browser request, sessions can give you that.

Client-managed sessions#

To create a client-managed session, when sending a request, set session.id to a version 4 UUID.

When sending follow-up requests with the same session ID, the created session will be reused, i.e. all requests will share the same IP address, network stack, cookie jar, etc.

Compared to server-managed sessions, client-managed sessions offer a lower-level API that lets you do more but also requires you to do more. For example:

  • You control the number of sessions being used. You decide how many sessions you want to use at a given time, you create those sessions, you rotate your pool of sessions among your requests, and you create new sessions as old sessions expire.

  • You can stop using a specific session, e.g. if you can tell from a response that the target website invalidated the session.

See session for details.

Example 1: Same-session requests use the same IP address

Note

Install and configure code example requirements and the Zyte CA certificate to run the example below.

using System;
using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;

HttpClientHandler handler = new HttpClientHandler()
{
    AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);

var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);

client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");

var sessionId = Guid.NewGuid().ToString();

for (int i = 0; i < 2; i++)
{
    var input = new Dictionary<string, object>(){
        {"url", "https://httpbin.org/ip"},
        {"httpResponseBody", true},
        {
            "session",
            new Dictionary<string, string>()
            {
                {"id", sessionId}
            }
        }
    };
    var inputJson = JsonSerializer.Serialize(input);
    var content = new StringContent(inputJson, Encoding.UTF8, "application/json");

    HttpResponseMessage response = await client.PostAsync("https://api.zyte.com/v1/extract", content);
    var body = await response.Content.ReadAsByteArrayAsync();

    var data = JsonDocument.Parse(body);
    var base64HttpResponseBody = data.RootElement.GetProperty("httpResponseBody").ToString();
    var httpResponseBodyBytes = System.Convert.FromBase64String(base64HttpResponseBody);
    var httpResponseBody = System.Text.Encoding.UTF8.GetString(httpResponseBodyBytes);

    var responseData = JsonDocument.Parse(httpResponseBody);
    var ipAddress = responseData.RootElement.GetProperty("origin").ToString();

    Console.WriteLine(ipAddress);
}
input.jsonl#
{"url": "https://httpbin.org/ip", "httpResponseBody": true, "session": {"id": "e07843b4-fd72-4a02-82b4-3376c6ceba92"}}
{"url": "https://httpbin.org/ip", "httpResponseBody": true, "session": {"id": "e07843b4-fd72-4a02-82b4-3376c6ceba92"}}
zyte-api input.jsonl \
    | jq --raw-output .httpResponseBody \
    | base64 --decode \
    | jq --raw-output .origin
input.json#
{
    "url": "https://httpbin.org/ip",
    "httpResponseBody": true,
    "session": {
        "id": "e07843b4-fd72-4a02-82b4-3376c6ceba92"
    }
}
for i in {1..2}
do
    curl \
        --user YOUR_API_KEY: \
        --header 'Content-Type: application/json' \
        --data @input.json \
        --compressed \
        https://api.zyte.com/v1/extract \
        | jq --raw-output .httpResponseBody \
        | base64 --decode \
        | jq --raw-output .origin
done
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Map;
import java.util.UUID;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;

class Example {
  private static final String API_KEY = "YOUR_API_KEY";

  public static void main(final String[] args)
      throws InterruptedException, IOException, ParseException {
    String sessionId = UUID.randomUUID().toString();

    for (int i = 0; i < 2; i++) {
      Map<String, Object> session = ImmutableMap.of("id", sessionId);
      Map<String, Object> parameters =
          ImmutableMap.of(
              "url", "https://httpbin.org/ip", "httpResponseBody", true, "session", session);
      String requestBody = new Gson().toJson(parameters);

      HttpPost request = new HttpPost("https://api.zyte.com/v1/extract");
      request.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
      request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
      request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
      request.setEntity(new StringEntity(requestBody));

      try (CloseableHttpClient client = HttpClients.createDefault()) {
        try (CloseableHttpResponse response = client.execute(request)) {
          HttpEntity entity = response.getEntity();
          String apiResponse = EntityUtils.toString(entity, StandardCharsets.UTF_8);
          JsonObject jsonObject = JsonParser.parseString(apiResponse).getAsJsonObject();
          String base64HttpResponseBody = jsonObject.get("httpResponseBody").getAsString();
          byte[] httpResponseBodyBytes = Base64.getDecoder().decode(base64HttpResponseBody);
          String httpResponseBody = new String(httpResponseBodyBytes, StandardCharsets.UTF_8);
          JsonObject data = JsonParser.parseString(httpResponseBody).getAsJsonObject();
          String body = data.get("origin").getAsString();
          System.out.println(body);
        }
      }
    }
  }

  private static String buildAuthHeader() {
    String auth = API_KEY + ":";
    String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
    return "Basic " + encodedAuth;
  }
}
const axios = require('axios')
const crypto = require('crypto')

const sessionId = String(crypto.randomUUID())

axios.post(
  'https://api.zyte.com/v1/extract',
  {
    url: 'https://httpbin.org/ip',
    httpResponseBody: true,
    session: { id: sessionId }
  },
  {
    auth: { username: 'YOUR_API_KEY' }
  }
).then((response) => {
  const httpResponseBody = Buffer.from(
    response.data.httpResponseBody,
    'base64'
  )
  const body = JSON.parse(httpResponseBody).origin
  console.log(body)
  axios.post(
    'https://api.zyte.com/v1/extract',
    {
      url: 'https://httpbin.org/ip',
      httpResponseBody: true,
      session: { id: sessionId }
    },
    {
      auth: { username: 'YOUR_API_KEY' }
    }
  ).then((response) => {
    const httpResponseBody = Buffer.from(
      response.data.httpResponseBody,
      'base64'
    )
    const body = JSON.parse(httpResponseBody).origin
    console.log(body)
  })
})
<?php

// https://stackoverflow.com/a/15875555
function uuidv4()
{
    $data = random_bytes(16);

    $data[6] = chr(ord($data[6]) & 0x0F | 0x40); // set version to 0100
    $data[8] = chr(ord($data[8]) & 0x3F | 0x80); // set bits 6-7 to 10

    return vsprintf('%s%s-%s-%s-%s-%s%s%s', str_split(bin2hex($data), 4));
}

$client = new GuzzleHttp\Client();
$session_id = uuidv4();

for ($i = 0; $i < 2; ++$i) {
    $response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
        'auth' => ['YOUR_API_KEY', ''],
        'headers' => ['Accept-Encoding' => 'gzip'],
        'json' => [
            'url' => 'https://httpbin.org/anything',
            'httpResponseBody' => true,
            'session' => ['id' => $session_id],
        ],
    ]);
    $data = json_decode($response->getBody());
    $http_response_body = base64_decode($data->httpResponseBody);
    $body = json_decode($http_response_body)->origin;
    echo $body.PHP_EOL;
}

With the proxy mode, use the Zyte-Session-ID header.

for i in {1..2}
do
    curl \
        --proxy api.zyte.com:8011 \
        --proxy-user YOUR_API_KEY: \
        --header 'Content-Type: application/json' \
        --header 'Zyte-Session-ID: e07843b4-fd72-4a02-82b4-3376c6ceba92' \
        --compressed \
        https://httpbin.org/ip \
        | jq --raw-output .origin
done
import json
from base64 import b64decode
from uuid import uuid4

import requests

session_id = str(uuid4())

for _ in range(2):
    api_response = requests.post(
        "https://api.zyte.com/v1/extract",
        auth=("YOUR_API_KEY", ""),
        json={
            "url": "https://httpbin.org/ip",
            "httpResponseBody": True,
            "session": {"id": session_id},
        },
    )
    http_response_body = b64decode(api_response.json()["httpResponseBody"])
    body: str = json.loads(http_response_body)["origin"]
    print(body)
import asyncio
import json
from base64 import b64decode
from uuid import uuid4

from zyte_api import AsyncZyteAPI


async def main():
    client = AsyncZyteAPI()
    session_id = str(uuid4())
    for i in range(2):
        api_response = await client.get(
            {
                "url": "https://httpbin.org/ip",
                "httpResponseBody": True,
                "session": {"id": session_id},
            },
        )
        http_response_body = b64decode(api_response["httpResponseBody"]).decode()
        data = json.loads(http_response_body)
        print(data["origin"])


asyncio.run(main())

Tip

scrapy-zyte-api also provides its own session management API, similar to that of server-managed sessions, but built on top of client-managed sessions.

import json
from uuid import uuid4

from scrapy import Request, Spider


class HTTPBinOrgSpider(Spider):
    name = "httpbin_org"

    def start_requests(self):
        session_id = str(uuid4())
        yield Request(
            "https://httpbin.org/ip",
            cb_kwargs={"session_id": session_id},
            meta={"zyte_api_automap": {"session": {"id": session_id}}},
        )

    def parse(self, response, session_id):
        print(json.loads(response.body)["origin"])
        yield Request(
            "https://httpbin.org/ip",
            meta={"zyte_api_automap": {"session": {"id": session_id}}},
            dont_filter=True,
            callback=self.parse2,
        )

    def parse2(self, response):
        print(json.loads(response.body)["origin"])

Output:

203.0.113.122
203.0.113.122
Example 2: Reuse browser cookies in HTTP requests

Start a session with a browser request to the home page of a website, and reuse that session for an HTTP request to a different URL of that website.

using System;
using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;

HttpClientHandler handler = new HttpClientHandler()
{
    AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);

var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);

client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");

var sessionId = Guid.NewGuid().ToString();

var browserInput = new Dictionary<string, object>(){
    {"url", "https://toscrape.com/"},
    {"browserHtml", true},
    {
        "session",
        new Dictionary<string, string>()
        {
            {"id", sessionId}
        }
    }
};
var browserInputJson = JsonSerializer.Serialize(browserInput);
var browserContent = new StringContent(browserInputJson, Encoding.UTF8, "application/json");
await client.PostAsync("https://api.zyte.com/v1/extract", browserContent);

var httpInput = new Dictionary<string, object>(){
    {"url", "https://toscrape.com/"},
    {"httpResponseBody", true},
    {
        "session",
        new Dictionary<string, string>()
        {
            {"id", sessionId}
        }
    }
};
var httpInputJson = JsonSerializer.Serialize(httpInput);
var httpContent = new StringContent(httpInputJson, Encoding.UTF8, "application/json");
HttpResponseMessage httpResponse = await client.PostAsync("https://api.zyte.com/v1/extract", httpContent);
var httpResponseBody = await httpResponse.Content.ReadAsByteArrayAsync();
var httpData = JsonDocument.Parse(httpResponseBody);
var base64HttpResponseBodyField = httpData.RootElement.GetProperty("httpResponseBody").ToString();
var httpResponseBodyField = System.Convert.FromBase64String(base64HttpResponseBodyField);
var result = System.Text.Encoding.UTF8.GetString(httpResponseBodyField);

Console.WriteLine(result);
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Map;
import java.util.UUID;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;

class Example {
  private static final String API_KEY = "YOUR_API_KEY";

  public static void main(final String[] args)
      throws InterruptedException, IOException, ParseException {
    String sessionId = UUID.randomUUID().toString();
    Map<String, Object> session = ImmutableMap.of("id", sessionId);

    Map<String, Object> browserParameters =
        ImmutableMap.of("url", "https://toscrape.com/", "browserHtml", true, "session", session);
    String browserRequestBody = new Gson().toJson(browserParameters);

    HttpPost browserRequest = new HttpPost("https://api.zyte.com/v1/extract");
    browserRequest.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
    browserRequest.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
    browserRequest.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
    browserRequest.setEntity(new StringEntity(browserRequestBody));

    try (CloseableHttpClient client = HttpClients.createDefault()) {
      try (CloseableHttpResponse browserResponse = client.execute(browserRequest)) {
        Map<String, Object> httpParameters =
            ImmutableMap.of(
                "url", "https://books.toscrape.com/", "httpResponseBody", true, "session", session);
        String httpRequestBody = new Gson().toJson(httpParameters);

        HttpPost httpRequest = new HttpPost("https://api.zyte.com/v1/extract");
        httpRequest.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
        httpRequest.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
        httpRequest.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
        httpRequest.setEntity(new StringEntity(httpRequestBody));

        try (CloseableHttpResponse httpResponse = client.execute(httpRequest)) {
          HttpEntity httpEntity = httpResponse.getEntity();
          String httpApiResponse = EntityUtils.toString(httpEntity, StandardCharsets.UTF_8);
          JsonObject httpJsonObject = JsonParser.parseString(httpApiResponse).getAsJsonObject();
          String base64HttpResponseBody = httpJsonObject.get("httpResponseBody").getAsString();
          byte[] httpResponseBodyBytes = Base64.getDecoder().decode(base64HttpResponseBody);
          String httpResponseBody = new String(httpResponseBodyBytes, StandardCharsets.UTF_8);
          System.out.println(httpResponseBody);
        }
      }
    }
  }

  private static String buildAuthHeader() {
    String auth = API_KEY + ":";
    String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
    return "Basic " + encodedAuth;
  }
}
const axios = require('axios')
const crypto = require('crypto')

const sessionId = String(crypto.randomUUID())

axios.post(
  'https://api.zyte.com/v1/extract',
  {
    url: 'https://toscrape.com/',
    browserHtml: true,
    session: { id: sessionId }
  },
  {
    auth: { username: 'YOUR_API_KEY' }
  }
).then((browserResponse) => {
  axios.post(
    'https://api.zyte.com/v1/extract',
    {
      url: 'https://books.toscrape.com/',
      httpResponseBody: true,
      session: { id: sessionId }
    },
    {
      auth: { username: 'YOUR_API_KEY' }
    }
  ).then((httpResponse) => {
    const httpResponseBody = Buffer.from(
      httpResponse.data.httpResponseBody,
      'base64'
    )
    console.log(httpResponseBody.toString())
  })
})
<?php

// https://stackoverflow.com/a/15875555
function uuidv4()
{
    $data = random_bytes(16);

    $data[6] = chr(ord($data[6]) & 0x0F | 0x40); // set version to 0100
    $data[8] = chr(ord($data[8]) & 0x3F | 0x80); // set bits 6-7 to 10

    return vsprintf('%s%s-%s-%s-%s-%s%s%s', str_split(bin2hex($data), 4));
}

$client = new GuzzleHttp\Client();
$session_id = uuidv4();

$browser_response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
    'auth' => ['YOUR_API_KEY', ''],
    'headers' => ['Accept-Encoding' => 'gzip'],
    'json' => [
        'url' => 'https://toscrape.com/',
        'browserHtml' => true,
        'session' => ['id' => $session_id],
    ],
]);
$http_response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
    'auth' => ['YOUR_API_KEY', ''],
    'headers' => ['Accept-Encoding' => 'gzip'],
    'json' => [
        'url' => 'https://books.toscrape.com/',
        'httpResponseBody' => true,
        'session' => ['id' => $session_id],
    ],
]);
$http_data = json_decode($http_response->getBody());
$http_response_body = base64_decode($http_data->httpResponseBody);
echo $http_response_body;
from base64 import b64decode
from uuid import uuid4

import requests

session_id = str(uuid4())

browser_response = requests.post(
    "https://api.zyte.com/v1/extract",
    auth=("YOUR_API_KEY", ""),
    json={
        "url": "https://toscrape.com/",
        "browserHtml": True,
        "session": {"id": session_id},
    },
)
http_response = requests.post(
    "https://api.zyte.com/v1/extract",
    auth=("YOUR_API_KEY", ""),
    json={
        "url": "https://books.toscrape.com/",
        "httpResponseBody": True,
        "session": {"id": session_id},
    },
)
http_response_body = b64decode(http_response.json()["httpResponseBody"])
print(http_response_body.decode())
import asyncio
from base64 import b64decode
from uuid import uuid4

from zyte_api import AsyncZyteAPI


async def main():
    client = AsyncZyteAPI()
    session_id = str(uuid4())
    browser_response = await client.get(
        {
            "url": "https://toscrape.com/",
            "browserHtml": True,
            "session": {"id": session_id},
        }
    )
    http_response = await client.get(
        {
            "url": "https://books.toscrape.com/",
            "httpResponseBody": True,
            "session": {"id": session_id},
        }
    )
    http_response_body = b64decode(http_response["httpResponseBody"]).decode()
    print(http_response_body)


asyncio.run(main())
from uuid import uuid4

from scrapy import Request, Spider


class ToScrapeComSpider(Spider):
    name = "toscrape_com"

    def start_requests(self):
        session_id = str(uuid4())
        yield Request(
            "https://toscrape.com/",
            callback=self.parse_browser,
            cb_kwargs={"session_id": session_id},
            meta={
                "zyte_api_automap": {
                    "browserHtml": True,
                    "session": {"id": session_id},
                },
            },
        )

    def parse_browser(self, response, session_id):
        yield response.follow(
            "https://books.toscrape.com/",
            callback=self.parse_http,
            meta={
                "zyte_api_automap": {
                    "session": {"id": session_id},
                },
            },
        )

    def parse_http(self, response):
        print(response.text)

Server-managed sessions#

Warning

Pricing-wise, requests that do not reuse a previous session and use sessionContextParameters.actions count as browser requests, including action costs.

Note

The proxy mode does not support server-managed sessions.

Session contexts let you request a server-managed session and define prerequisites for it.

To assign a session context to a request:

Every request that you send with the same value in sessionContext will use a session that was initialized with sessionContextParameters. All those requests should also always include the sessionContextParameters request field with the same value.

Zyte API handles creation, reuse, and deletion of sessions requested through sessionContext, meaning:

Example 1: Set a cookie on all sessions

Note

Install and configure code example requirements and the Zyte CA certificate to run the example below.

using System;
using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;
using System.Xml.XPath;
using HtmlAgilityPack;

HttpClientHandler handler = new HttpClientHandler()
{
    AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);

var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);

client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");

var input = new Dictionary<string, object>(){
    {"url", "http://httpbin.org/cookies"},
    {"httpResponseBody", true},
    {
        "sessionContext",
        new List<Dictionary<string, string>>()
        {
            new Dictionary<string, string>()
            {
                {"name", "id"},
                {"value", "cookies"}
            }
        }
    },
    {
        "sessionContextParameters",
        new Dictionary<string, object>()
        {
            {
                "actions",
                new List<Dictionary<string, object>>()
                {
                    new Dictionary<string, object>()
                    {
                        {"action", "goto"},
                        {"url", "http://httpbin.org/cookies/set/foo/bar"},
                    }
                }
            }
        }
    }
};
var inputJson = JsonSerializer.Serialize(input);
var content = new StringContent(inputJson, Encoding.UTF8, "application/json");

HttpResponseMessage response = await client.PostAsync("https://api.zyte.com/v1/extract", content);
var body = await response.Content.ReadAsByteArrayAsync();

var data = JsonDocument.Parse(body);
var base64HttpResponseBody = data.RootElement.GetProperty("httpResponseBody").ToString();
var httpResponseBodyBytes = System.Convert.FromBase64String(base64HttpResponseBody);
var httpResponseBody = System.Text.Encoding.UTF8.GetString(httpResponseBodyBytes);

Console.WriteLine(httpResponseBody);
input.jsonl#
{"url": "http://httpbin.org/cookies", "httpResponseBody": true, "sessionContext": [{"name": "id", "value": "cookies"}], "sessionContextParameters": {"actions": [{"action": "goto", "url": "http://httpbin.org/cookies/set/foo/bar"}]}}
zyte-api input.jsonl \
    | jq --raw-output .httpResponseBody \
    | base64 --decode
input.json#
{
    "url": "http://httpbin.org/cookies",
    "httpResponseBody": true,
    "sessionContext": [
        {
            "name": "id",
            "value": "cookies"
        }
    ],
    "sessionContextParameters": {
        "actions": [
            {
                "action": "goto",
                "url": "http://httpbin.org/cookies/set/foo/bar"
            }
        ]
    }
}
curl \
    --user YOUR_API_KEY: \
    --header 'Content-Type: application/json' \
    --data @input.json \
    --compressed \
    https://api.zyte.com/v1/extract \
    | jq --raw-output .httpResponseBody \
    | base64 --decode
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Map;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;

class Example {

  private static final String API_KEY = "YOUR_API_KEY";

  public static void main(final String[] args)
      throws InterruptedException, IOException, ParseException {
    Map<String, Object> parameters =
        ImmutableMap.of(
            "url",
            "http://httpbin.org/cookies",
            "httpResponseBody",
            true,
            "sessionContext",
            ImmutableList.of(ImmutableMap.of("name", "id", "value", "cookies")),
            "sessionContextParameters",
            ImmutableMap.of(
                "actions",
                ImmutableList.of(
                    ImmutableMap.of(
                        "action", "goto", "url", "http://httpbin.org/cookies/set/foo/bar"))));

    String requestBody = new Gson().toJson(parameters);

    HttpPost request = new HttpPost("https://api.zyte.com/v1/extract");
    request.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
    request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
    request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
    request.setEntity(new StringEntity(requestBody));

    try (CloseableHttpClient client = HttpClients.createDefault()) {
      try (CloseableHttpResponse response = client.execute(request)) {
        HttpEntity entity = response.getEntity();
        String apiResponse = EntityUtils.toString(entity, StandardCharsets.UTF_8);
        JsonObject jsonObject = JsonParser.parseString(apiResponse).getAsJsonObject();
        String base64HttpResponseBody = jsonObject.get("httpResponseBody").getAsString();
        byte[] httpResponseBodyBytes = Base64.getDecoder().decode(base64HttpResponseBody);
        String httpResponseBody = new String(httpResponseBodyBytes, StandardCharsets.UTF_8);
        System.out.println(httpResponseBody);
      }
    }
  }

  private static String buildAuthHeader() {
    String auth = API_KEY + ":";
    String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
    return "Basic " + encodedAuth;
  }
}
const axios = require('axios')

axios.post(
  'https://api.zyte.com/v1/extract',
  {
    url: 'http://httpbin.org/cookies',
    httpResponseBody: true,
    sessionContext: [
      {
        name: 'id',
        value: 'cookies'
      }
    ],
    sessionContextParameters: {
      actions: [
        {
          action: 'goto',
          url: 'http://httpbin.org/cookies/set/foo/bar'
        }
      ]
    }
  },
  {
    auth: { username: 'YOUR_API_KEY' }
  }
).then((response) => {
  const httpResponseBody = Buffer.from(
    response.data.httpResponseBody,
    'base64'
  )
  console.log(httpResponseBody.toString())
})
<?php

$client = new GuzzleHttp\Client();
$response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
    'auth' => ['YOUR_API_KEY', ''],
    'headers' => ['Accept-Encoding' => 'gzip'],
    'json' => [
        'url' => 'http://httpbin.org/cookies',
        'httpResponseBody' => true,
        'sessionContext' => [
            [
                'name' => 'id',
                'value' => 'cookies',
            ],
        ],
        'sessionContextParameters' => [
            'actions' => [
                [
                    'action' => 'goto',
                    'url' => 'http://httpbin.org/cookies/set/foo/bar',
                ],
            ],
        ],
    ],
]);
$data = json_decode($response->getBody());
$http_response_body = base64_decode($data->httpResponseBody);
echo $http_response_body."\n";
from base64 import b64decode

import requests

api_response = requests.post(
    "https://api.zyte.com/v1/extract",
    auth=("YOUR_API_KEY", ""),
    json={
        "url": "http://httpbin.org/cookies",
        "httpResponseBody": True,
        "sessionContext": [
            {
                "name": "id",
                "value": "cookies",
            },
        ],
        "sessionContextParameters": {
            "actions": [
                {
                    "action": "goto",
                    "url": "http://httpbin.org/cookies/set/foo/bar",
                },
            ],
        },
    },
)
http_response_body_bytes = b64decode(api_response.json()["httpResponseBody"])
http_response_body = http_response_body_bytes.decode()
print(http_response_body)
import asyncio
from base64 import b64decode

from zyte_api import AsyncZyteAPI


async def main():
    client = AsyncZyteAPI()
    api_response = await client.get(
        {
            "url": "http://httpbin.org/cookies",
            "httpResponseBody": True,
            "sessionContext": [
                {
                    "name": "id",
                    "value": "cookies",
                },
            ],
            "sessionContextParameters": {
                "actions": [
                    {
                        "action": "goto",
                        "url": "http://httpbin.org/cookies/set/foo/bar",
                    },
                ],
            },
        },
    )
    http_response_body_bytes = b64decode(api_response["httpResponseBody"])
    http_response_body = http_response_body_bytes.decode()
    print(http_response_body)


asyncio.run(main())

Tip

scrapy-zyte-api also provides its own session management API, similar to that of server-managed sessions, but built on top of client-managed sessions.

from scrapy import Request, Spider


class HTTPBinOrgSpider(Spider):
    name = "httpbin_org"

    def start_requests(self):
        yield Request(
            "http://httpbin.org/cookies",
            meta={
                "zyte_api_automap": {
                    "sessionContext": [
                        {
                            "name": "id",
                            "value": "cookies",
                        },
                    ],
                    "sessionContextParameters": {
                        "actions": [
                            {
                                "action": "goto",
                                "url": "http://httpbin.org/cookies/set/foo/bar",
                            },
                        ],
                    },
                },
            },
        )

    def parse(self, response):
        print(response.text)

Output:

{
  "cookies": {
    "foo": "bar"
  }
}
Example 2: Start sessions on a browser, use them in HTTP requests

Set a no-op action in sessionContextParameters to force sessions to start with a browser request, but use HTTP requests.

using System;
using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;
using System.Xml.XPath;
using HtmlAgilityPack;

HttpClientHandler handler = new HttpClientHandler()
{
    AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);

var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);

client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");

var input = new Dictionary<string, object>(){
    {"url", "https://toscrape.com/"},
    {"httpResponseBody", true},
    {
        "sessionContext",
        new List<Dictionary<string, string>>()
        {
            new Dictionary<string, string>()
            {
                {"name", "id"},
                {"value", "browser"}
            }
        }
    },
    {
        "sessionContextParameters",
        new Dictionary<string, object>()
        {
            {
                "actions",
                new List<Dictionary<string, object>>()
                {
                    new Dictionary<string, object>()
                    {
                        {"action", "waitForTimeout"},
                        {"timeout", 0},
                    }
                }
            }
        }
    }
};
var inputJson = JsonSerializer.Serialize(input);
var content = new StringContent(inputJson, Encoding.UTF8, "application/json");

HttpResponseMessage response = await client.PostAsync("https://api.zyte.com/v1/extract", content);
var body = await response.Content.ReadAsByteArrayAsync();

var data = JsonDocument.Parse(body);
var base64HttpResponseBody = data.RootElement.GetProperty("httpResponseBody").ToString();
var httpResponseBodyBytes = System.Convert.FromBase64String(base64HttpResponseBody);
var httpResponseBody = System.Text.Encoding.UTF8.GetString(httpResponseBodyBytes);

Console.WriteLine(httpResponseBody);
input.jsonl#
{"url": "https://toscrape.com/", "httpResponseBody": true, "sessionContext": [{"name": "id", "value": "browser"}], "sessionContextParameters": {"actions": [{"action": "waitForTimeout", "timeout": 0}]}}
zyte-api input.jsonl \
    | jq --raw-output .httpResponseBody \
    | base64 --decode
input.json#
{
    "url": "https://toscrape.com/",
    "httpResponseBody": true,
    "sessionContext": [
        {
            "name": "id",
            "value": "browser"
        }
    ],
    "sessionContextParameters": {
        "actions": [
            {
                "action": "waitForTimeout",
                "timeout": 0
            }
        ]
    }
}
curl \
    --user YOUR_API_KEY: \
    --header 'Content-Type: application/json' \
    --data @input.json \
    --compressed \
    https://api.zyte.com/v1/extract \
    | jq --raw-output .httpResponseBody \
    | base64 --decode
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Map;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;

class Example {

  private static final String API_KEY = "YOUR_API_KEY";

  public static void main(final String[] args)
      throws InterruptedException, IOException, ParseException {
    Map<String, Object> parameters =
        ImmutableMap.of(
            "url",
            "https://toscrape.com/",
            "httpResponseBody",
            true,
            "sessionContext",
            ImmutableList.of(ImmutableMap.of("name", "id", "value", "browser")),
            "sessionContextParameters",
            ImmutableMap.of(
                "actions",
                ImmutableList.of(ImmutableMap.of("action", "waitForTimeout", "timeout", 0))));

    String requestBody = new Gson().toJson(parameters);

    HttpPost request = new HttpPost("https://api.zyte.com/v1/extract");
    request.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
    request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
    request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
    request.setEntity(new StringEntity(requestBody));

    try (CloseableHttpClient client = HttpClients.createDefault()) {
      try (CloseableHttpResponse response = client.execute(request)) {
        HttpEntity entity = response.getEntity();
        String apiResponse = EntityUtils.toString(entity, StandardCharsets.UTF_8);
        JsonObject jsonObject = JsonParser.parseString(apiResponse).getAsJsonObject();
        String base64HttpResponseBody = jsonObject.get("httpResponseBody").getAsString();
        byte[] httpResponseBodyBytes = Base64.getDecoder().decode(base64HttpResponseBody);
        String httpResponseBody = new String(httpResponseBodyBytes, StandardCharsets.UTF_8);
        System.out.println(httpResponseBody);
      }
    }
  }

  private static String buildAuthHeader() {
    String auth = API_KEY + ":";
    String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
    return "Basic " + encodedAuth;
  }
}
const axios = require('axios')

axios.post(
  'https://api.zyte.com/v1/extract',
  {
    url: 'https://toscrape.com/',
    httpResponseBody: true,
    sessionContext: [
      {
        name: 'id',
        value: 'browser'
      }
    ],
    sessionContextParameters: {
      actions: [
        {
          action: 'waitForTimeout',
          timeout: 0
        }
      ]
    }
  },
  {
    auth: { username: 'YOUR_API_KEY' }
  }
).then((response) => {
  const httpResponseBody = Buffer.from(
    response.data.httpResponseBody,
    'base64'
  )
  console.log(httpResponseBody.toString())
})
<?php

$client = new GuzzleHttp\Client();
$response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
    'auth' => ['YOUR_API_KEY', ''],
    'headers' => ['Accept-Encoding' => 'gzip'],
    'json' => [
        'url' => 'https://toscrape.com/',
        'httpResponseBody' => true,
        'sessionContext' => [
            [
                'name' => 'id',
                'value' => 'browser',
            ],
        ],
        'sessionContextParameters' => [
            'actions' => [
                [
                    'action' => 'waitForTimeout',
                    'timeout' => 0,
                ],
            ],
        ],
    ],
]);
$data = json_decode($response->getBody());
$http_response_body = base64_decode($data->httpResponseBody);
echo $http_response_body."\n";
from base64 import b64decode

import requests

api_response = requests.post(
    "https://api.zyte.com/v1/extract",
    auth=("YOUR_API_KEY", ""),
    json={
        "url": "https://toscrape.com/",
        "httpResponseBody": True,
        "sessionContext": [{"name": "id", "value": "browser"}],
        "sessionContextParameters": {
            "actions": [
                {
                    "action": "waitForTimeout",
                    "timeout": 0,
                },
            ],
        },
    },
)
http_response_body_bytes = b64decode(api_response.json()["httpResponseBody"])
http_response_body = http_response_body_bytes.decode()
print(http_response_body)
import asyncio
from base64 import b64decode

from zyte_api import AsyncZyteAPI


async def main():
    client = AsyncZyteAPI()
    http_response = await client.get(
        {
            "url": "https://toscrape.com/",
            "httpResponseBody": True,
            "sessionContext": [{"name": "id", "value": "browser"}],
            "sessionContextParameters": {
                "actions": [
                    {
                        "action": "waitForTimeout",
                        "timeout": 0,
                    },
                ],
            },
        }
    )
    http_response_body = b64decode(http_response["httpResponseBody"]).decode()
    print(http_response_body)


asyncio.run(main())
from scrapy import Request, Spider


class HTTPBinOrgSpider(Spider):
    name = "httpbin_org"

    def start_requests(self):
        yield Request(
            "https://toscrape.com/",
            meta={
                "zyte_api_automap": {
                    "sessionContext": [
                        {
                            "name": "id",
                            "value": "browser",
                        },
                    ],
                    "sessionContextParameters": {
                        "actions": [
                            {
                                "action": "waitForTimeout",
                                "timeout": 0,
                            },
                        ],
                    },
                },
            },
        )

    def parse(self, response):
        print(response.text)

Session IP addresses#

Requests using the same session will normally share the same IP address.

This may not be the case, though, in the following scenarios:

  • If Zyte API is using a residential IP address for a session, and that IP address expires, new requests using the same session will get a different IP address.

    The new IP address will be in the same country as the original IP address.

  • When using client-managed sessions, if you send 2 or more requests in parallel with the same session ID, and the session does not exist already, each request may get a different IP address.

    You should create sessions with a single request and, once you get a response, you can send as many parallel requests as you want with that session.

While requests in the same session are almost guaranteed to use the same IP address, requests from different sessions are not guaranteed to have different IP addresses, although they often will.

Response headers#

Set the httpResponseHeaders request field to true to get HTTP response headers in the httpResponseHeaders response field.

Example

Note

Install and configure code example requirements and the Zyte CA certificate to run the example below.

using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;

HttpClientHandler handler = new HttpClientHandler()
{
    AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);

var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);

client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");

var input = new Dictionary<string, object>(){
    {"url", "https://toscrape.com"},
    {"httpResponseHeaders", true}
};
var inputJson = JsonSerializer.Serialize(input);
var content = new StringContent(inputJson, Encoding.UTF8, "application/json");

HttpResponseMessage response = await client.PostAsync("https://api.zyte.com/v1/extract", content);
var body = await response.Content.ReadAsByteArrayAsync();

var data = JsonDocument.Parse(body);
var headerEnumerator = data.RootElement.GetProperty("httpResponseHeaders").EnumerateArray();
var headers = new Dictionary<string, string>();
while (headerEnumerator.MoveNext())
{
    headers.Add(
        headerEnumerator.Current.GetProperty("name").ToString(),
        headerEnumerator.Current.GetProperty("value").ToString()
    );
}
input.jsonl#
{"url": "https://toscrape.com", "httpResponseHeaders": true}
zyte-api input.jsonl \
    | jq .httpResponseHeaders
input.json#
{
    "url": "https://toscrape.com",
    "httpResponseHeaders": true
}
curl \
    --user YOUR_API_KEY: \
    --header 'Content-Type: application/json' \
    --data @input.json \
    --compressed \
    https://api.zyte.com/v1/extract \
    | jq .httpResponseHeaders
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Map;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;

class Example {
  private static final String API_KEY = "YOUR_API_KEY";

  public static void main(final String[] args)
      throws InterruptedException, IOException, ParseException {
    Map<String, Object> parameters =
        ImmutableMap.of(
            "url", "https://toscrape.com", "browserHtml", true, "httpResponseHeaders", true);
    String requestBody = new Gson().toJson(parameters);

    HttpPost request = new HttpPost("https://api.zyte.com/v1/extract");
    request.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
    request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
    request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
    request.setEntity(new StringEntity(requestBody));

    try (CloseableHttpClient client = HttpClients.createDefault()) {
      try (CloseableHttpResponse response = client.execute(request)) {
        HttpEntity entity = response.getEntity();
        String apiResponse = EntityUtils.toString(entity, StandardCharsets.UTF_8);
        JsonObject jsonObject = JsonParser.parseString(apiResponse).getAsJsonObject();
        JsonArray httpResponseHeaders = jsonObject.get("httpResponseHeaders").getAsJsonArray();
      }
    }
  }

  private static String buildAuthHeader() {
    String auth = API_KEY + ":";
    String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
    return "Basic " + encodedAuth;
  }
}
const axios = require('axios')

axios.post(
  'https://api.zyte.com/v1/extract',
  {
    url: 'https://toscrape.com',
    httpResponseHeaders: true
  },
  {
    auth: { username: 'YOUR_API_KEY' }
  }
).then((response) => {
  const httpResponseHeaders = response.data.httpResponseHeaders
})
<?php

$client = new GuzzleHttp\Client();
$response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
    'auth' => ['YOUR_API_KEY', ''],
    'headers' => ['Accept-Encoding' => 'gzip'],
    'json' => [
        'url' => 'https://toscrape.com',
        'httpResponseHeaders' => true,
    ],
]);
$api = json_decode($response->getBody());
$http_response_headers = $api->httpResponseHeaders;

With the proxy mode, response headers are always included in the HTTP response, no need to ask for them explicitly.

import requests

api_response = requests.post(
    "https://api.zyte.com/v1/extract",
    auth=("YOUR_API_KEY", ""),
    json={
        "url": "https://toscrape.com",
        "httpResponseHeaders": True,
    },
)
http_response_headers = api_response.json()["httpResponseHeaders"]
import asyncio
import json

from zyte_api import AsyncZyteAPI


async def main():
    client = AsyncZyteAPI()
    api_response = await client.get(
        {
            "url": "https://toscrape.com",
            "httpResponseHeaders": True,
        }
    )
    http_response_headers = api_response["httpResponseHeaders"]
    print(json.dumps(http_response_headers, indent=2))


asyncio.run(main())
from scrapy import Request, Spider


class ToScrapeComSpider(Spider):
    name = "toscrape_com"

    def start_requests(self):
        yield Request(
            "https://toscrape.com",
            meta={
                "zyte_api_automap": {
                    "httpResponseBody": False,
                    "httpResponseHeaders": True,
                },
            },
        )

    def parse(self, response):
        headers = response.headers

Note

In transparent mode, httpResponseHeaders is sent by default for httpResponseBody requests, but sending it explicitly is still recommended, as future versions of scrapy-zyte-api may stop sending it by default.

Output (first 5 lines):

[
  {
    "name": "date",
    "value": "Fri, 25 Aug 2023 07:08:05 GMT"
  },

Note

Reading cookies from Set-Cookie response headers is not recommended, because it only contains the cookies set by the final response, it does not account for cookies set during redirection or during browser rendering. Better use responseCookies as described in Cookies.

Metadata#

Set the echoData request field to an arbitrary value, to get that value verbatim in the echoData response field.

Example

Note

Install and configure code example requirements and the Zyte CA certificate to run the example below.

using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;

var inputData = new List<List<object>>()
{
    new List<object>(){"https://toscrape.com", 1},
    new List<object>(){"https://books.toscrape.com", 2},
    new List<object>(){"https://quotes.toscrape.com", 3},
};
var output = new List<HttpResponseMessage>();

var handler = new HttpClientHandler()
{
    AutomaticDecompression = DecompressionMethods.All,
    MaxConnectionsPerServer = 15
};
var client = new HttpClient(handler);

var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);

client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");

var responseTasks = new List<Task<HttpResponseMessage>>();
foreach (var entry in inputData)
{
    var input = new Dictionary<string, object>(){
        {"url", entry[0]},
        {"browserHtml", true},
        {"echoData", entry[1]}
    };
    var inputJson = JsonSerializer.Serialize(input);
    var content = new StringContent(inputJson, Encoding.UTF8, "application/json");
    var responseTask = client.PostAsync("https://api.zyte.com/v1/extract", content);
    responseTasks.Add(responseTask);
}

while (responseTasks.Any())
{
    var responseTask = await Task.WhenAny(responseTasks);
    responseTasks.Remove(responseTask);
    var response = await responseTask;
    output.Add(response);
}
input.jsonl#
{"url": "https://toscrape.com", "browserHtml": true, "echoData": 1}
{"url": "https://books.toscrape.com", "browserHtml": true, "echoData": 2}
{"url": "https://quotes.toscrape.com", "browserHtml": true, "echoData": 3}
zyte-api --n-conn 15 input.jsonl -o output.jsonl
input.jsonl#
{"url": "https://toscrape.com", "browserHtml": true, "echoData": 1}
{"url": "https://books.toscrape.com", "browserHtml": true, "echoData": 2}
{"url": "https://quotes.toscrape.com", "browserHtml": true, "echoData": 3}
cat input.jsonl \
    | xargs -P 15 -d\\n -n 1 \
    bash -c "
        curl \
            --user $ZYTE_API_KEY: \
            --header 'Content-Type: application/json' \
            --data \"\$0\" \
            --compressed \
            https://api.zyte.com/v1/extract \
        | jq .echoData \
        | awk '{print \$1}' \
        >> output.jsonl
"
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Base64;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import org.apache.hc.client5.http.async.methods.SimpleHttpRequest;
import org.apache.hc.client5.http.async.methods.SimpleHttpResponse;
import org.apache.hc.client5.http.impl.async.CloseableHttpAsyncClient;
import org.apache.hc.client5.http.impl.async.HttpAsyncClients;
import org.apache.hc.client5.http.impl.nio.PoolingAsyncClientConnectionManager;
import org.apache.hc.client5.http.impl.nio.PoolingAsyncClientConnectionManagerBuilder;
import org.apache.hc.client5.http.ssl.ClientTlsStrategyBuilder;
import org.apache.hc.core5.concurrent.FutureCallback;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.nio.ssl.TlsStrategy;
import org.apache.hc.core5.reactor.ssl.TlsDetails;

class Example {
  private static final String API_KEY = "YOUR_API_KEY";

  public static void main(final String[] args)
      throws ExecutionException, InterruptedException, IOException, ParseException {

    Object[][] input = {
      {"https://toscrape.com", 1},
      {"https://bookstoscrape.com", 2},
      {"https://quotes.toscrape.com", 3}
    };
    List<Future> futures = new ArrayList<Future>();
    List<String> output = new ArrayList<String>();

    int concurrency = 15;

    // https://issues.apache.org/jira/browse/HTTPCLIENT-2219
    final TlsStrategy tlsStrategy =
        ClientTlsStrategyBuilder.create()
            .useSystemProperties()
            .setTlsDetailsFactory(
                sslEngine ->
                    new TlsDetails(sslEngine.getSession(), sslEngine.getApplicationProtocol()))
            .build();

    PoolingAsyncClientConnectionManager connectionManager =
        PoolingAsyncClientConnectionManagerBuilder.create().setTlsStrategy(tlsStrategy).build();
    connectionManager.setMaxTotal(concurrency);
    connectionManager.setDefaultMaxPerRoute(concurrency);

    CloseableHttpAsyncClient client =
        HttpAsyncClients.custom().setConnectionManager(connectionManager).build();
    try {
      client.start();
      for (int i = 0; i < input.length; i++) {
        Map<String, Object> parameters =
            ImmutableMap.of("url", input[i][0], "browserHtml", true, "echoData", input[i][1]);
        String requestBody = new Gson().toJson(parameters);

        SimpleHttpRequest request =
            new SimpleHttpRequest("POST", "https://api.zyte.com/v1/extract");
        request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
        request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
        request.setBody(requestBody, ContentType.APPLICATION_JSON);

        final Future<SimpleHttpResponse> future =
            client.execute(
                request,
                new FutureCallback<SimpleHttpResponse>() {
                  public void completed(final SimpleHttpResponse response) {
                    String apiResponse = response.getBodyText();
                    output.add(apiResponse);
                  }

                  public void failed(final Exception ex) {}

                  public void cancelled() {}
                });
        futures.add(future);
      }
      for (int i = 0; i < futures.size(); i++) {
        futures.get(i).get();
      }
    } finally {
      client.close();
    }
  }

  private static String buildAuthHeader() {
    String auth = API_KEY + ":";
    String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
    return "Basic " + encodedAuth;
  }
}
const { ConcurrencyManager } = require('axios-concurrency')
const axios = require('axios')

const urls = [
  ['https://toscrape.com', 1],
  ['https://books.toscrape.com', 2],
  ['https://quotes.toscrape.com', 3]
]
const output = []

const client = axios.create()
ConcurrencyManager(client, 15)

Promise.all(
  urls.map((input) =>
    client.post(
      'https://api.zyte.com/v1/extract',
      { url: input[0], browserHtml: true, echoData: input[1] },
      {
        auth: { username: 'YOUR_API_KEY' }
      }
    ).then((response) => output.push(response.data))
  )
)
<?php

$input = [
    ['https://toscrape.com', 1],
    ['https://books.toscrape.com', 2],
    ['https://quotes.toscrape.com', 3],
];
$output = [];
$promises = [];

$client = new GuzzleHttp\Client();

foreach ($input as $url_and_index) {
    $options = [
        'auth' => ['YOUR_API_KEY', ''],
        'headers' => ['Accept-Encoding' => 'gzip'],
        'json' => [
            'url' => $url_and_index[0],
            'browserHtml' => true,
            'echoData' => $url_and_index[1],
        ],
    ];
    $request = new \GuzzleHttp\Psr7\Request('POST', 'https://api.zyte.com/v1/extract');
    global $promises;
    $promises[] = $client->sendAsync($request, $options)->then(function ($response) {
        global $output;
        $output[] = json_decode($response->getBody());
    });
}

foreach ($promises as $promise) {
    $promise->wait();
}
import asyncio

import aiohttp

input_data = [
    ("https://toscrape.com", 1),
    ("https://books.toscrape.com", 2),
    ("https://quotes.toscrape.com", 3),
]
output = []


async def extract(client, url, index):
    response = await client.post(
        "https://api.zyte.com/v1/extract",
        json={"url": url, "browserHtml": True, "echoData": index},
        auth=aiohttp.BasicAuth("YOUR_API_KEY"),
    )
    output.append(await response.json())


async def main():
    connector = aiohttp.TCPConnector(limit_per_host=15)
    async with aiohttp.ClientSession(connector=connector) as client:
        await asyncio.gather(
            *[extract(client, url, index) for url, index in input_data]
        )


asyncio.run(main())
import asyncio
import json

from zyte_api import AsyncZyteAPI

input_data = [
    ("https://toscrape.com", 1),
    ("https://books.toscrape.com", 2),
    ("https://quotes.toscrape.com", 3),
]


async def main():
    client = AsyncZyteAPI(n_conn=15)
    queries = [
        {"url": url, "browserHtml": True, "echoData": index}
        for url, index in input_data
    ]
    async with client.session() as session:
        for future in session.iter(queries):
            response = await future
            print(json.dumps(response))


asyncio.run(main())
from scrapy import Request, Spider

input_data = [
    ("https://toscrape.com", 1),
    ("https://books.toscrape.com", 2),
    ("https://quotes.toscrape.com", 3),
]


class ToScrapeSpider(Spider):
    name = "toscrape_com"

    custom_settings = {
        "CONCURRENT_REQUESTS": 15,
        "CONCURRENT_REQUESTS_PER_DOMAIN": 15,
    }

    def start_requests(self):
        for url, index in input_data:
            yield Request(
                url,
                meta={
                    "zyte_api_automap": {
                        "browserHtml": True,
                        "echoData": index,
                    },
                },
            )

    def parse(self, response):
        yield {
            "index": response.raw_api_response["echoData"],
            "html": response.text,
        }

Alternatively, you can use Scrapy’s Request.cb_kwargs directly for a similar purpose:


    def start_requests(self):
        for url, index in input_data:
            yield Request(
                url,
                cb_kwargs={"index": index},
                meta={
                    "zyte_api_automap": {
                        "browserHtml": True,
                    },
                },
            )

    def parse(self, response, index):
        yield {
            "index": index,
            "html": response.text,
        }

Output:

{"url": "https://quotes.toscrape.com/", "statusCode": 200, "browserHtml": "<!DOCTYPE html><html lang=\"en\"><head>\n\t<meta charset=\"UTF-8\">\n\t<title>Quotes to Scrape</title>\n    <link rel=\"stylesheet\" href=\"/static/bootstrap.min.css\">\n    <link rel=\"stylesheet\" href=\"/static/main.css\">\n</head>\n<body>\n    <div class=\"container\">\n        <div class=\"row header-box\">\n            <div class=\"col-md-8\">\n                <h1>\n                    <a href=\"/\" style=\"text-decoration: none\">Quotes to Scrape</a>\n                </h1>\n            </div>\n            <div class=\"col-md-4\">\n                <p>\n                \n                    <a href=\"/login\">Login</a>\n                \n                </p>\n            </div>\n        </div>\n    \n\n<div class=\"row\">\n    <div class=\"col-md-8\">\n\n    <div class=\"quote\" itemscope=\"\" itemtype=\"http://schema.org/CreativeWork\">\n        <span class=\"text\" itemprop=\"text\">“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”</span>\n        <span>by <small class=\"author\" itemprop=\"author\">Albert Einstein</small>\n        <a href=\"/author/Albert-Einstein\">(about)</a>\n        </span>\n        <div class=\"tags\">\n            Tags:\n            <meta class=\"keywords\" itemprop=\"keywords\" content=\"change,deep-thoughts,thinking,world\"> \n            \n            <a class=\"tag\" href=\"/tag/change/page/1/\">change</a>\n            \n            <a class=\"tag\" href=\"/tag/deep-thoughts/page/1/\">deep-thoughts</a>\n            \n            <a class=\"tag\" href=\"/tag/thinking/page/1/\">thinking</a>\n            \n            <a class=\"tag\" href=\"/tag/world/page/1/\">world</a>\n            \n        </div>\n    </div>\n\n    <div class=\"quote\" itemscope=\"\" itemtype=\"http://schema.org/CreativeWork\">\n        <span class=\"text\" itemprop=\"text\">“It is our choices, Harry, that show what we truly are, far more than our abilities.”</span>\n        <span>by <small class=\"author\" itemprop=\"author\">J.K. Rowling</small>\n        <a href=\"/author/J-K-Rowling\">(about)</a>\n        </span>\n        <div class=\"tags\">\n            Tags:\n            <meta class=\"keywords\" itemprop=\"keywords\" content=\"abilities,choices\"> \n            \n            <a class=\"tag\" href=\"/tag/abilities/page/1/\">abilities</a>\n            \n            <a class=\"tag\" href=\"/tag/choices/page/1/\">choices</a>\n            \n        </div>\n    </div>\n\n    <div class=\"quote\" itemscope=\"\" itemtype=\"http://schema.org/CreativeWork\">\n        <span class=\"text\" itemprop=\"text\">“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”</span>\n        <span>by <small class=\"author\" itemprop=\"author\">Albert Einstein</small>\n        <a href=\"/author/Albert-Einstein\">(about)</a>\n        </span>\n        <div class=\"tags\">\n            Tags:\n            <meta class=\"keywords\" itemprop=\"keywords\" content=\"inspirational,life,live,miracle,miracles\"> \n            \n            <a class=\"tag\" href=\"/tag/inspirational/page/1/\">inspirational</a>\n            \n            <a class=\"tag\" href=\"/tag/life/page/1/\">life</a>\n            \n            <a class=\"tag\" href=\"/tag/live/page/1/\">live</a>\n            \n            <a class=\"tag\" href=\"/tag/miracle/page/1/\">miracle</a>\n            \n            <a class=\"tag\" href=\"/tag/miracles/page/1/\">miracles</a>\n            \n        </div>\n    </div>\n\n    <div class=\"quote\" itemscope=\"\" itemtype=\"http://schema.org/CreativeWork\">\n        <span class=\"text\" itemprop=\"text\">“The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.”</span>\n        <span>by <small class=\"author\" itemprop=\"author\">Jane Austen</small>\n        <a href=\"/author/Jane-Austen\">(about)</a>\n        </span>\n        <div class=\"tags\">\n            Tags:\n            <meta class=\"keywords\" itemprop=\"keywords\" content=\"aliteracy,books,classic,humor\"> \n            \n            <a class=\"tag\" href=\"/tag/aliteracy/page/1/\">aliteracy</a>\n            \n            <a class=\"tag\" href=\"/tag/books/page/1/\">books</a>\n            \n            <a class=\"tag\" href=\"/tag/classic/page/1/\">classic</a>\n            \n            <a class=\"tag\" href=\"/tag/humor/page/1/\">humor</a>\n            \n        </div>\n    </div>\n\n    <div class=\"quote\" itemscope=\"\" itemtype=\"http://schema.org/CreativeWork\">\n        <span class=\"text\" itemprop=\"text\">“Imperfection is beauty, madness is genius and it's better to be absolutely ridiculous than absolutely boring.”</span>\n        <span>by <small class=\"author\" itemprop=\"author\">Marilyn Monroe</small>\n        <a href=\"/author/Marilyn-Monroe\">(about)</a>\n        </span>\n        <div class=\"tags\">\n            Tags:\n            <meta class=\"keywords\" itemprop=\"keywords\" content=\"be-yourself,inspirational\"> \n            \n            <a class=\"tag\" href=\"/tag/be-yourself/page/1/\">be-yourself</a>\n            \n            <a class=\"tag\" href=\"/tag/inspirational/page/1/\">inspirational</a>\n            \n        </div>\n    </div>\n\n    <div class=\"quote\" itemscope=\"\" itemtype=\"http://schema.org/CreativeWork\">\n        <span class=\"text\" itemprop=\"text\">“Try not to become a man of success. Rather become a man of value.”</span>\n        <span>by <small class=\"author\" itemprop=\"author\">Albert Einstein</small>\n        <a href=\"/author/Albert-Einstein\">(about)</a>\n        </span>\n        <div class=\"tags\">\n            Tags:\n            <meta class=\"keywords\" itemprop=\"keywords\" content=\"adulthood,success,value\"> \n            \n            <a class=\"tag\" href=\"/tag/adulthood/page/1/\">adulthood</a>\n            \n            <a class=\"tag\" href=\"/tag/success/page/1/\">success</a>\n            \n            <a class=\"tag\" href=\"/tag/value/page/1/\">value</a>\n            \n        </div>\n    </div>\n\n    <div class=\"quote\" itemscope=\"\" itemtype=\"http://schema.org/CreativeWork\">\n        <span class=\"text\" itemprop=\"text\">“It is better to be hated for what you are than to be loved for what you are not.”</span>\n        <span>by <small class=\"author\" itemprop=\"author\">André Gide</small>\n        <a href=\"/author/Andre-Gide\">(about)</a>\n        </span>\n        <div class=\"tags\">\n            Tags:\n            <meta class=\"keywords\" itemprop=\"keywords\" content=\"life,love\"> \n            \n            <a class=\"tag\" href=\"/tag/life/page/1/\">life</a>\n            \n            <a class=\"tag\" href=\"/tag/love/page/1/\">love</a>\n            \n        </div>\n    </div>\n\n    <div class=\"quote\" itemscope=\"\" itemtype=\"http://schema.org/CreativeWork\">\n        <span class=\"text\" itemprop=\"text\">“I have not failed. I've just found 10,000 ways that won't work.”</span>\n        <span>by <small class=\"author\" itemprop=\"author\">Thomas A. Edison</small>\n        <a href=\"/author/Thomas-A-Edison\">(about)</a>\n        </span>\n        <div class=\"tags\">\n            Tags:\n            <meta class=\"keywords\" itemprop=\"keywords\" content=\"edison,failure,inspirational,paraphrased\"> \n            \n            <a class=\"tag\" href=\"/tag/edison/page/1/\">edison</a>\n            \n            <a class=\"tag\" href=\"/tag/failure/page/1/\">failure</a>\n            \n            <a class=\"tag\" href=\"/tag/inspirational/page/1/\">inspirational</a>\n            \n            <a class=\"tag\" href=\"/tag/paraphrased/page/1/\">paraphrased</a>\n            \n        </div>\n    </div>\n\n    <div class=\"quote\" itemscope=\"\" itemtype=\"http://schema.org/CreativeWork\">\n        <span class=\"text\" itemprop=\"text\">“A woman is like a tea bag; you never know how strong it is until it's in hot water.”</span>\n        <span>by <small class=\"author\" itemprop=\"author\">Eleanor Roosevelt</small>\n        <a href=\"/author/Eleanor-Roosevelt\">(about)</a>\n        </span>\n        <div class=\"tags\">\n            Tags:\n            <meta class=\"keywords\" itemprop=\"keywords\" content=\"misattributed-eleanor-roosevelt\"> \n            \n            <a class=\"tag\" href=\"/tag/misattributed-eleanor-roosevelt/page/1/\">misattributed-eleanor-roosevelt</a>\n            \n        </div>\n    </div>\n\n    <div class=\"quote\" itemscope=\"\" itemtype=\"http://schema.org/CreativeWork\">\n        <span class=\"text\" itemprop=\"text\">“A day without sunshine is like, you know, night.”</span>\n        <span>by <small class=\"author\" itemprop=\"author\">Steve Martin</small>\n        <a href=\"/author/Steve-Martin\">(about)</a>\n        </span>\n        <div class=\"tags\">\n            Tags:\n            <meta class=\"keywords\" itemprop=\"keywords\" content=\"humor,obvious,simile\"> \n            \n            <a class=\"tag\" href=\"/tag/humor/page/1/\">humor</a>\n            \n            <a class=\"tag\" href=\"/tag/obvious/page/1/\">obvious</a>\n            \n            <a class=\"tag\" href=\"/tag/simile/page/1/\">simile</a>\n            \n        </div>\n    </div>\n\n    <nav>\n        <ul class=\"pager\">\n            \n            \n            <li class=\"next\">\n                <a href=\"/page/2/\">Next <span aria-hidden=\"true\">→</span></a>\n            </li>\n            \n        </ul>\n    </nav>\n    </div>\n    <div class=\"col-md-4 tags-box\">\n        \n            <h2>Top Ten tags</h2>\n            \n            <span class=\"tag-item\">\n            <a class=\"tag\" style=\"font-size: 28px\" href=\"/tag/love/\">love</a>\n            </span>\n            \n            <span class=\"tag-item\">\n            <a class=\"tag\" style=\"font-size: 26px\" href=\"/tag/inspirational/\">inspirational</a>\n            </span>\n            \n            <span class=\"tag-item\">\n            <a class=\"tag\" style=\"font-size: 26px\" href=\"/tag/life/\">life</a>\n            </span>\n            \n            <span class=\"tag-item\">\n            <a class=\"tag\" style=\"font-size: 24px\" href=\"/tag/humor/\">humor</a>\n            </span>\n            \n            <span class=\"tag-item\">\n            <a class=\"tag\" style=\"font-size: 22px\" href=\"/tag/books/\">books</a>\n            </span>\n            \n            <span class=\"tag-item\">\n            <a class=\"tag\" style=\"font-size: 14px\" href=\"/tag/reading/\">reading</a>\n            </span>\n            \n            <span class=\"tag-item\">\n            <a class=\"tag\" style=\"font-size: 10px\" href=\"/tag/friendship/\">friendship</a>\n            </span>\n            \n            <span class=\"tag-item\">\n            <a class=\"tag\" style=\"font-size: 8px\" href=\"/tag/friends/\">friends</a>\n            </span>\n            \n            <span class=\"tag-item\">\n            <a class=\"tag\" style=\"font-size: 8px\" href=\"/tag/truth/\">truth</a>\n            </span>\n            \n            <span class=\"tag-item\">\n            <a class=\"tag\" style=\"font-size: 6px\" href=\"/tag/simile/\">simile</a>\n            </span>\n            \n        \n    </div>\n</div>\n\n    </div>\n    <footer class=\"footer\">\n        <div class=\"container\">\n            <p class=\"text-muted\">\n                Quotes by: <a href=\"https://www.goodreads.com/quotes\">GoodReads.com</a>\n            </p>\n            <p class=\"copyright\">\n                Made with <span class=\"zyte\">❤</span> by <a class=\"zyte\" href=\"https://www.zyte.com\">Zyte</a>\n            </p>\n        </div>\n    </footer>\n\n</body></html>", "echoData": 3}
{"url": "https://books.toscrape.com/", "statusCode": 200, "browserHtml": "<!DOCTYPE html><!--[if lt IE 7]>      <html lang=\"en-us\" class=\"no-js lt-ie9 lt-ie8 lt-ie7\"> <![endif]--><!--[if IE 7]>         <html lang=\"en-us\" class=\"no-js lt-ie9 lt-ie8\"> <![endif]--><!--[if IE 8]>         <html lang=\"en-us\" class=\"no-js lt-ie9\"> <![endif]--><!--[if gt IE 8]><!--><html lang=\"en-us\" class=\"no-js\"><!--<![endif]--><head>\n        <title>\n    All products | Books to Scrape - Sandbox\n</title>\n\n        <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\">\n        <meta name=\"created\" content=\"24th Jun 2016 09:29\">\n        <meta name=\"description\" content=\"\">\n        <meta name=\"viewport\" content=\"width=device-width\">\n        <meta name=\"robots\" content=\"NOARCHIVE,NOCACHE\">\n\n        <!-- Le HTML5 shim, for IE6-8 support of HTML elements -->\n        <!--[if lt IE 9]>\n        <script src=\"//html5shim.googlecode.com/svn/trunk/html5.js\"></script>\n        <![endif]-->\n\n        \n            <link rel=\"shortcut icon\" href=\"static/oscar/favicon.ico\">\n        \n\n        \n        \n    \n    \n        <link rel=\"stylesheet\" type=\"text/css\" href=\"static/oscar/css/styles.css\">\n    \n    <link rel=\"stylesheet\" href=\"static/oscar/js/bootstrap-datetimepicker/bootstrap-datetimepicker.css\">\n    <link rel=\"stylesheet\" type=\"text/css\" href=\"static/oscar/css/datetimepicker.css\">\n\n\n        \n        \n\n        \n\n        \n            \n            \n\n        \n    </head>\n\n    <body id=\"default\" class=\"default\">\n        \n        \n    \n    \n    <header class=\"header container-fluid\">\n        <div class=\"page_inner\">\n            <div class=\"row\">\n                <div class=\"col-sm-8 h1\"><a href=\"index.html\">Books to Scrape</a><small> We love being scraped!</small>\n</div>\n\n                \n            </div>\n        </div>\n    </header>\n\n    \n    \n<div class=\"container-fluid page\">\n    <div class=\"page_inner\">\n        \n    <ul class=\"breadcrumb\">\n        <li>\n            <a href=\"index.html\">Home</a>\n        </li>\n        <li class=\"active\">All products</li>\n    </ul>\n\n        <div class=\"row\">\n\n            <aside class=\"sidebar col-sm-4 col-md-3\">\n                \n                <div id=\"promotions_left\">\n                    \n                </div>\n                \n    \n    \n        \n        <div class=\"side_categories\">\n            <ul class=\"nav nav-list\">\n                \n                    <li>\n                        <a href=\"catalogue/category/books_1/index.html\">\n                            \n                                Books\n                            \n                        </a>\n\n                        <ul>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/travel_2/index.html\">\n                            \n                                Travel\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/mystery_3/index.html\">\n                            \n                                Mystery\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/historical-fiction_4/index.html\">\n                            \n                                Historical Fiction\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/sequential-art_5/index.html\">\n                            \n                                Sequential Art\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/classics_6/index.html\">\n                            \n                                Classics\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/philosophy_7/index.html\">\n                            \n                                Philosophy\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/romance_8/index.html\">\n                            \n                                Romance\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/womens-fiction_9/index.html\">\n                            \n                                Womens Fiction\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/fiction_10/index.html\">\n                            \n                                Fiction\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/childrens_11/index.html\">\n                            \n                                Childrens\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/religion_12/index.html\">\n                            \n                                Religion\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/nonfiction_13/index.html\">\n                            \n                                Nonfiction\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/music_14/index.html\">\n                            \n                                Music\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/default_15/index.html\">\n                            \n                                Default\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/science-fiction_16/index.html\">\n                            \n                                Science Fiction\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/sports-and-games_17/index.html\">\n                            \n                                Sports and Games\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/add-a-comment_18/index.html\">\n                            \n                                Add a comment\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/fantasy_19/index.html\">\n                            \n                                Fantasy\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/new-adult_20/index.html\">\n                            \n                                New Adult\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/young-adult_21/index.html\">\n                            \n                                Young Adult\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/science_22/index.html\">\n                            \n                                Science\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/poetry_23/index.html\">\n                            \n                                Poetry\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/paranormal_24/index.html\">\n                            \n                                Paranormal\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/art_25/index.html\">\n                            \n                                Art\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/psychology_26/index.html\">\n                            \n                                Psychology\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/autobiography_27/index.html\">\n                            \n                                Autobiography\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/parenting_28/index.html\">\n                            \n                                Parenting\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/adult-fiction_29/index.html\">\n                            \n                                Adult Fiction\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/humor_30/index.html\">\n                            \n                                Humor\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/horror_31/index.html\">\n                            \n                                Horror\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/history_32/index.html\">\n                            \n                                History\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/food-and-drink_33/index.html\">\n                            \n                                Food and Drink\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/christian-fiction_34/index.html\">\n                            \n                                Christian Fiction\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/business_35/index.html\">\n                            \n                                Business\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/biography_36/index.html\">\n                            \n                                Biography\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/thriller_37/index.html\">\n                            \n                                Thriller\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/contemporary_38/index.html\">\n                            \n                                Contemporary\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/spirituality_39/index.html\">\n                            \n                                Spirituality\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/academic_40/index.html\">\n                            \n                                Academic\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/self-help_41/index.html\">\n                            \n                                Self Help\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/historical_42/index.html\">\n                            \n                                Historical\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/christian_43/index.html\">\n                            \n                                Christian\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/suspense_44/index.html\">\n                            \n                                Suspense\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/short-stories_45/index.html\">\n                            \n                                Short Stories\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/novels_46/index.html\">\n                            \n                                Novels\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/health_47/index.html\">\n                            \n                                Health\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/politics_48/index.html\">\n                            \n                                Politics\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/cultural_49/index.html\">\n                            \n                                Cultural\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/erotica_50/index.html\">\n                            \n                                Erotica\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/crime_51/index.html\">\n                            \n                                Crime\n                            \n                        </a>\n\n                        </li>\n                        \n                            </ul></li>\n                        \n                \n            </ul>\n        </div>\n    \n    \n\n            </aside>\n\n            <div class=\"col-sm-8 col-md-9\">\n                \n                <div class=\"page-header action\">\n                    <h1>All products</h1>\n                </div>\n                \n\n                \n\n\n\n<div id=\"messages\">\n\n</div>\n\n\n                <div id=\"promotions\">\n                    \n                </div>\n\n                \n    <form method=\"get\" class=\"form-horizontal\">\n        \n        <div style=\"display:none\">\n            \n            \n        </div>\n\n        \n            \n                \n                    <strong>1000</strong> results - showing <strong>1</strong> to <strong>20</strong>.\n                \n            \n            \n        \n    </form>\n    \n        <section>\n            <div class=\"alert alert-warning\" role=\"alert\"><strong>Warning!</strong> This is a demo website for web scraping purposes. Prices and ratings here were randomly assigned and have no real meaning.</div>\n\n            <div>\n                <ol class=\"row\">\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/a-light-in-the-attic_1000/index.html\"><img src=\"media/cache/2c/da/2cdad67c44b002e7ead0cc35693c0e8b.jpg\" alt=\"A Light in the Attic\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Three\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/a-light-in-the-attic_1000/index.html\" title=\"A Light in the Attic\">A Light in the ...</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£51.77</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/tipping-the-velvet_999/index.html\"><img src=\"media/cache/26/0c/260c6ae16bce31c8f8c95daddd9f4a1c.jpg\" alt=\"Tipping the Velvet\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating One\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/tipping-the-velvet_999/index.html\" title=\"Tipping the Velvet\">Tipping the Velvet</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£53.74</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/soumission_998/index.html\"><img src=\"media/cache/3e/ef/3eef99c9d9adef34639f510662022830.jpg\" alt=\"Soumission\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating One\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/soumission_998/index.html\" title=\"Soumission\">Soumission</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£50.10</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/sharp-objects_997/index.html\"><img src=\"media/cache/32/51/3251cf3a3412f53f339e42cac2134093.jpg\" alt=\"Sharp Objects\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Four\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/sharp-objects_997/index.html\" title=\"Sharp Objects\">Sharp Objects</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£47.82</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/sapiens-a-brief-history-of-humankind_996/index.html\"><img src=\"media/cache/be/a5/bea5697f2534a2f86a3ef27b5a8c12a6.jpg\" alt=\"Sapiens: A Brief History of Humankind\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Five\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/sapiens-a-brief-history-of-humankind_996/index.html\" title=\"Sapiens: A Brief History of Humankind\">Sapiens: A Brief History ...</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£54.23</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/the-requiem-red_995/index.html\"><img src=\"media/cache/68/33/68339b4c9bc034267e1da611ab3b34f8.jpg\" alt=\"The Requiem Red\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating One\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/the-requiem-red_995/index.html\" title=\"The Requiem Red\">The Requiem Red</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£22.65</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/the-dirty-little-secrets-of-getting-your-dream-job_994/index.html\"><img src=\"media/cache/92/27/92274a95b7c251fea59a2b8a78275ab4.jpg\" alt=\"The Dirty Little Secrets of Getting Your Dream Job\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Four\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/the-dirty-little-secrets-of-getting-your-dream-job_994/index.html\" title=\"The Dirty Little Secrets of Getting Your Dream Job\">The Dirty Little Secrets ...</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£33.34</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/the-coming-woman-a-novel-based-on-the-life-of-the-infamous-feminist-victoria-woodhull_993/index.html\"><img src=\"media/cache/3d/54/3d54940e57e662c4dd1f3ff00c78cc64.jpg\" alt=\"The Coming Woman: A Novel Based on the Life of the Infamous Feminist, Victoria Woodhull\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Three\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/the-coming-woman-a-novel-based-on-the-life-of-the-infamous-feminist-victoria-woodhull_993/index.html\" title=\"The Coming Woman: A Novel Based on the Life of the Infamous Feminist, Victoria Woodhull\">The Coming Woman: A ...</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£17.93</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/the-boys-in-the-boat-nine-americans-and-their-epic-quest-for-gold-at-the-1936-berlin-olympics_992/index.html\"><img src=\"media/cache/66/88/66883b91f6804b2323c8369331cb7dd1.jpg\" alt=\"The Boys in the Boat: Nine Americans and Their Epic Quest for Gold at the 1936 Berlin Olympics\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Four\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/the-boys-in-the-boat-nine-americans-and-their-epic-quest-for-gold-at-the-1936-berlin-olympics_992/index.html\" title=\"The Boys in the Boat: Nine Americans and Their Epic Quest for Gold at the 1936 Berlin Olympics\">The Boys in the ...</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£22.60</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/the-black-maria_991/index.html\"><img src=\"media/cache/58/46/5846057e28022268153beff6d352b06c.jpg\" alt=\"The Black Maria\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating One\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/the-black-maria_991/index.html\" title=\"The Black Maria\">The Black Maria</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£52.15</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/starving-hearts-triangular-trade-trilogy-1_990/index.html\"><img src=\"media/cache/be/f4/bef44da28c98f905a3ebec0b87be8530.jpg\" alt=\"Starving Hearts (Triangular Trade Trilogy, #1)\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Two\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/starving-hearts-triangular-trade-trilogy-1_990/index.html\" title=\"Starving Hearts (Triangular Trade Trilogy, #1)\">Starving Hearts (Triangular Trade ...</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£13.99</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/shakespeares-sonnets_989/index.html\"><img src=\"media/cache/10/48/1048f63d3b5061cd2f424d20b3f9b666.jpg\" alt=\"Shakespeare's Sonnets\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Four\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/shakespeares-sonnets_989/index.html\" title=\"Shakespeare's Sonnets\">Shakespeare's Sonnets</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£20.66</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/set-me-free_988/index.html\"><img src=\"media/cache/5b/88/5b88c52633f53cacf162c15f4f823153.jpg\" alt=\"Set Me Free\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Five\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/set-me-free_988/index.html\" title=\"Set Me Free\">Set Me Free</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£17.46</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/scott-pilgrims-precious-little-life-scott-pilgrim-1_987/index.html\"><img src=\"media/cache/94/b1/94b1b8b244bce9677c2f29ccc890d4d2.jpg\" alt=\"Scott Pilgrim's Precious Little Life (Scott Pilgrim #1)\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Five\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/scott-pilgrims-precious-little-life-scott-pilgrim-1_987/index.html\" title=\"Scott Pilgrim's Precious Little Life (Scott Pilgrim #1)\">Scott Pilgrim's Precious Little ...</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£52.29</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/rip-it-up-and-start-again_986/index.html\"><img src=\"media/cache/81/c4/81c4a973364e17d01f217e1188253d5e.jpg\" alt=\"Rip it Up and Start Again\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Five\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/rip-it-up-and-start-again_986/index.html\" title=\"Rip it Up and Start Again\">Rip it Up and ...</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£35.02</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/our-band-could-be-your-life-scenes-from-the-american-indie-underground-1981-1991_985/index.html\"><img src=\"media/cache/54/60/54607fe8945897cdcced0044103b10b6.jpg\" alt=\"Our Band Could Be Your Life: Scenes from the American Indie Underground, 1981-1991\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Three\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/our-band-could-be-your-life-scenes-from-the-american-indie-underground-1981-1991_985/index.html\" title=\"Our Band Could Be Your Life: Scenes from the American Indie Underground, 1981-1991\">Our Band Could Be ...</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£57.25</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/olio_984/index.html\"><img src=\"media/cache/55/33/553310a7162dfbc2c6d19a84da0df9e1.jpg\" alt=\"Olio\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating One\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/olio_984/index.html\" title=\"Olio\">Olio</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£23.88</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/mesaerion-the-best-science-fiction-stories-1800-1849_983/index.html\"><img src=\"media/cache/09/a3/09a3aef48557576e1a85ba7efea8ecb7.jpg\" alt=\"Mesaerion: The Best Science Fiction Stories 1800-1849\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating One\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/mesaerion-the-best-science-fiction-stories-1800-1849_983/index.html\" title=\"Mesaerion: The Best Science Fiction Stories 1800-1849\">Mesaerion: The Best Science ...</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£37.59</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/libertarianism-for-beginners_982/index.html\"><img src=\"media/cache/0b/bc/0bbcd0a6f4bcd81ccb1049a52736406e.jpg\" alt=\"Libertarianism for Beginners\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Two\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/libertarianism-for-beginners_982/index.html\" title=\"Libertarianism for Beginners\">Libertarianism for Beginners</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£51.33</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/its-only-the-himalayas_981/index.html\"><img src=\"media/cache/27/a5/27a53d0bb95bdd88288eaf66c9230d7e.jpg\" alt=\"It's Only the Himalayas\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Two\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/its-only-the-himalayas_981/index.html\" title=\"It's Only the Himalayas\">It's Only the Himalayas</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£45.17</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                </ol>\n                \n\n\n\n    <div>\n        <ul class=\"pager\">\n            \n            <li class=\"current\">\n            \n                Page 1 of 50\n            \n            </li>\n            \n                <li class=\"next\"><a href=\"catalogue/page-2.html\">next</a></li>\n            \n        </ul>\n    </div>\n\n\n            </div>\n        </section>\n    \n\n\n            </div>\n\n        </div><!-- /row -->\n    </div><!-- /page_inner -->\n</div><!-- /container-fluid -->\n\n\n    \n<footer class=\"footer container-fluid\">\n    \n        \n    \n</footer>\n\n\n        \n        \n  \n            <!-- jQuery -->\n            <script src=\"http://ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js\"></script>\n            <script>window.jQuery || document.write('<script src=\"static/oscar/js/jquery/jquery-1.9.1.min.js\"><\\/script>')</script><script src=\"static/oscar/js/jquery/jquery-1.9.1.min.js\"></script>\n        \n  \n\n\n        \n        \n    \n        \n    <!-- Twitter Bootstrap -->\n    <script type=\"text/javascript\" src=\"static/oscar/js/bootstrap3/bootstrap.min.js\"></script>\n    <!-- Oscar -->\n    <script src=\"static/oscar/js/oscar/ui.js\" type=\"text/javascript\" charset=\"utf-8\"></script>\n\n    <script src=\"static/oscar/js/bootstrap-datetimepicker/bootstrap-datetimepicker.js\" type=\"text/javascript\" charset=\"utf-8\"></script>\n    <script src=\"static/oscar/js/bootstrap-datetimepicker/locales/bootstrap-datetimepicker.all.js\" type=\"text/javascript\" charset=\"utf-8\"></script>\n\n\n        \n        \n    \n\n    \n\n\n        \n        <script type=\"text/javascript\">\n            $(function() {\n                \n    \n    \n    oscar.init();\n\n    oscar.search.init();\n\n            });\n        </script>\n\n        \n        <!-- Version: N/A -->\n        \n    \n\n</body></html>", "echoData": 2}
{"url": "https://toscrape.com/", "statusCode": 200, "browserHtml": "<!DOCTYPE html><html lang=\"en\"><head>\n        <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">\n        <title>Scraping Sandbox</title>\n        <link href=\"./css/bootstrap.min.css\" rel=\"stylesheet\">\n        <link href=\"./css/main.css\" rel=\"stylesheet\">\n    </head>\n    <body>\n        <div class=\"container\">\n            <div class=\"row\">\n                <div class=\"col-md-1\"></div>\n                <div class=\"col-md-10 well\">\n                    <img class=\"logo\" src=\"img/zyte.png\" width=\"200px\">\n                    <h1 class=\"text-right\">Web Scraping Sandbox</h1>\n                </div>\n            </div>\n\n            <div class=\"row\">\n                <div class=\"col-md-1\"></div>\n                <div class=\"col-md-10\">\n                    <h2>Books</h2>\n                    <p>A <a href=\"http://books.toscrape.com\">fictional bookstore</a> that desperately wants to be scraped. It's a safe place for beginners learning web scraping and for developers validating their scraping technologies as well. Available at: <a href=\"http://books.toscrape.com\">books.toscrape.com</a></p>\n                    <div class=\"col-md-6\">\n                        <a href=\"http://books.toscrape.com\"><img src=\"./img/books.png\" class=\"img-thumbnail\"></a>\n                    </div>\n                    <div class=\"col-md-6\">\n                        <table class=\"table table-hover\">\n                            <tbody><tr><th colspan=\"2\">Details</th></tr>\n                            <tr><td>Amount of items </td><td>1000</td></tr>\n                            <tr><td>Pagination </td><td>✔</td></tr>\n                            <tr><td>Items per page </td><td>max 20</td></tr>\n                            <tr><td>Requires JavaScript </td><td>✘</td></tr>\n                        </tbody></table>\n                    </div>\n                </div>\n            </div>\n\n            <div class=\"row\">\n                <div class=\"col-md-1\"></div>\n                <div class=\"col-md-10\">\n                    <h2>Quotes</h2>\n                    <p><a href=\"http://quotes.toscrape.com/\">A website</a> that lists quotes from famous people. It has many endpoints showing the quotes in many different ways, each of them including new scraping challenges for you, as described below.</p>\n                    <div class=\"col-md-6\">\n                        <a href=\"http://quotes.toscrape.com\"><img src=\"./img/quotes.png\" class=\"img-thumbnail\"></a>\n                    </div>\n                    <div class=\"col-md-6\">\n                        <table class=\"table table-hover\">\n                            <tbody><tr><th colspan=\"2\">Endpoints</th></tr>\n                            <tr><td><a href=\"http://quotes.toscrape.com/\">Default</a></td><td>Microdata and pagination</td></tr>\n                            <tr><td><a href=\"http://quotes.toscrape.com/scroll\">Scroll</a> </td><td>infinite scrolling pagination</td></tr>\n                            <tr><td><a href=\"http://quotes.toscrape.com/js\">JavaScript</a> </td><td>JavaScript generated content</td></tr>\n                            <tr><td><a href=\"http://quotes.toscrape.com/js-delayed\">Delayed</a> </td><td>Same as JavaScript but with a delay (?delay=10000)</td></tr>\n                            <tr><td><a href=\"http://quotes.toscrape.com/tableful\">Tableful</a> </td><td>a table based messed-up layout</td></tr>\n                            <tr><td><a href=\"http://quotes.toscrape.com/login\">Login</a> </td><td>login with CSRF token (any user/passwd works)</td></tr>\n                            <tr><td><a href=\"http://quotes.toscrape.com/search.aspx\">ViewState</a> </td><td>an AJAX based filter form with ViewStates</td></tr>\n                            <tr><td><a href=\"http://quotes.toscrape.com/random\">Random</a> </td><td>a single random quote</td></tr>\n                        </tbody></table>\n                    </div>\n                </div>\n            </div>\n        </div>\n    \n\n</body></html>", "echoData": 1}