Zyte API shared features#

Learn here about Zyte API features that you can use with HTTP requests, browser requests, and automatic extraction: geolocation, cookies, session contexts, redirection, response headers, and metadata.

Geolocation#

The geographical point of origin of a request can influence the response content. Some websites adjust the language or currency based on the country of origin. Some websites only allow traffic from specific countries.

Zyte API uses the recommended location for any given website by default, but you can use the geolocation request field to force a specific country of origin for a request.

Example

Note

Install and configure code example requirements and the Zyte CA certificate to run the example below.

using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;

HttpClientHandler handler = new HttpClientHandler()
{
    AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);

var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);

client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");

var input = new Dictionary<string, object>(){
    {"url", "http://ip-api.com/json"},
    {"httpResponseBody", true},
    {"geolocation", "AU"}
};
var inputJson = JsonSerializer.Serialize(input);
var content = new StringContent(inputJson, Encoding.UTF8, "application/json");

HttpResponseMessage response = await client.PostAsync("https://api.zyte.com/v1/extract", content);
var body = await response.Content.ReadAsByteArrayAsync();

var data = JsonDocument.Parse(body);
var base64HttpResponseBody = data.RootElement.GetProperty("httpResponseBody").ToString();
var httpResponseBody = System.Convert.FromBase64String(base64HttpResponseBody);

var responseData = JsonDocument.Parse(httpResponseBody);
var countryCode = responseData.RootElement.GetProperty("countryCode").ToString();
input.jsonl#
{"url": "http://ip-api.com/json", "httpResponseBody": true, "geolocation": "AU"}
zyte-api input.jsonl \
    | jq --raw-output .httpResponseBody \
    | base64 --decode \
    | jq .countryCode
input.json#
{
    "url": "http://ip-api.com/json",
    "httpResponseBody": true,
    "geolocation": "AU"
}
curl \
    --user YOUR_API_KEY: \
    --header 'Content-Type: application/json' \
    --data @input.json \
    --compressed \
    https://api.zyte.com/v1/extract \
    | jq --raw-output .httpResponseBody \
    | base64 --decode \
    | jq .countryCode
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Map;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;

class Example {
  private static final String API_KEY = "YOUR_API_KEY";

  public static void main(final String[] args)
      throws InterruptedException, IOException, ParseException {
    Map<String, Object> parameters =
        ImmutableMap.of(
            "url", "http://ip-api.com/json", "httpResponseBody", true, "geolocation", "AU");
    String requestBody = new Gson().toJson(parameters);

    HttpPost request = new HttpPost("https://api.zyte.com/v1/extract");
    request.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
    request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
    request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
    request.setEntity(new StringEntity(requestBody));

    try (CloseableHttpClient client = HttpClients.createDefault()) {
      try (CloseableHttpResponse response = client.execute(request)) {
        HttpEntity entity = response.getEntity();
        String apiResponse = EntityUtils.toString(entity, StandardCharsets.UTF_8);
        JsonObject jsonObject = JsonParser.parseString(apiResponse).getAsJsonObject();
        String base64HttpResponseBody = jsonObject.get("httpResponseBody").getAsString();
        byte[] httpResponseBodyBytes = Base64.getDecoder().decode(base64HttpResponseBody);
        String httpResponseBody = new String(httpResponseBodyBytes, StandardCharsets.UTF_8);
        JsonObject data = JsonParser.parseString(httpResponseBody).getAsJsonObject();
        String countryCode = data.get("countryCode").getAsString();
      }
    }
  }

  private static String buildAuthHeader() {
    String auth = API_KEY + ":";
    String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
    return "Basic " + encodedAuth;
  }
}
const axios = require('axios')

axios.post(
  'https://api.zyte.com/v1/extract',
  {
    url: 'http://ip-api.com/json',
    httpResponseBody: true,
    geolocation: 'AU'
  },
  {
    auth: { username: 'YOUR_API_KEY' }
  }
).then((response) => {
  const httpResponseBody = Buffer.from(
    response.data.httpResponseBody,
    'base64'
  )
  const data = JSON.parse(httpResponseBody)
  const countryCode = data.countryCode
})
<?php

$client = new GuzzleHttp\Client();
$response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
    'auth' => ['YOUR_API_KEY', ''],
    'headers' => ['Accept-Encoding' => 'gzip'],
    'json' => [
        'url' => 'http://ip-api.com/json',
        'httpResponseBody' => true,
        'geolocation' => 'AU',
    ],
]);
$api = json_decode($response->getBody());
$http_response_body = base64_decode($api->httpResponseBody);
$data = json_decode($http_response_body);
$country_code = $data->countryCode;

With the proxy mode, use the Zyte-Geolocation header.

curl \
    --proxy api.zyte.com:8011 \
    --proxy-user YOUR_API_KEY: \
    --compressed \
    -H "Zyte-Geolocation: US" \
    http://ip-api.com/json \
    | jq .countryCode
import json
from base64 import b64decode

import requests

api_response = requests.post(
    "https://api.zyte.com/v1/extract",
    auth=("YOUR_API_KEY", ""),
    json={
        "url": "http://ip-api.com/json",
        "httpResponseBody": True,
        "geolocation": "AU",
    },
)
http_response_body: bytes = b64decode(api_response.json()["httpResponseBody"])
response_data = json.loads(http_response_body)
country_code = response_data["countryCode"]
import asyncio
import json
from base64 import b64decode

from zyte_api.aio.client import AsyncClient


async def main():
    client = AsyncClient()
    api_response = await client.request_raw(
        {
            "url": "http://ip-api.com/json",
            "httpResponseBody": True,
            "geolocation": "AU",
        }
    )
    http_response_body: bytes = b64decode(api_response["httpResponseBody"])
    response_data = json.loads(http_response_body)
    country_code = response_data["countryCode"]


asyncio.run(main())
import json

from scrapy import Request, Spider


class IPAPIComSpider(Spider):
    name = "ip_api_com"

    def start_requests(self):
        yield Request(
            "http://ip-api.com/json",
            meta={
                "zyte_api_automap": {
                    "geolocation": "AU",
                },
            },
        )

    def parse(self, response):
        response_data = json.loads(response.body)
        country_code = response_data["countryCode"]

Output:

"AU"

Cookies#

Some websites use cookies to track sessions and user preferences like language, address, etc.

Use the requestCookies and responseCookies request fields to set and get cookies.

Example

Note

Install and configure code example requirements and the Zyte CA certificate to run the example below.

The following code example sends a cookie to httpbin.org and prints the cookies that httpbin.org reports to have received:

using System;
using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;

HttpClientHandler handler = new HttpClientHandler()
{
    AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);

var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);

client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");

var input = new Dictionary<string, object>(){
    {"url", "https://httpbin.org/cookies"},
    {"httpResponseBody", true},
    {
        "requestCookies",
        new List<Dictionary<string, string>>()
        {
            new Dictionary<string, string>()
            {
                {"name", "foo"},
                {"value", "bar"},
                {"domain", "httpbin.org"}
            }
        }
    }
};
var inputJson = JsonSerializer.Serialize(input);
var content = new StringContent(inputJson, Encoding.UTF8, "application/json");

HttpResponseMessage response = await client.PostAsync("https://api.zyte.com/v1/extract", content);
var body = await response.Content.ReadAsByteArrayAsync();

var data = JsonDocument.Parse(body);
var base64HttpResponseBody = data.RootElement.GetProperty("httpResponseBody").ToString();
var httpResponseBody = System.Convert.FromBase64String(base64HttpResponseBody);
var result = System.Text.Encoding.UTF8.GetString(httpResponseBody);

Console.WriteLine(result);
input.jsonl#
{"url": "https://httpbin.org/cookies", "httpResponseBody": true, "requestCookies": [{"name": "foo", "value": "bar", "domain": "httpbin.org"}]}
zyte-api input.jsonl \
    | jq --raw-output .httpResponseBody \
    | base64 --decode
input.json#
{
    "url": "https://httpbin.org/cookies",
    "httpResponseBody": true,
    "requestCookies": [
        {
            "name": "foo",
            "value": "bar",
            "domain": "httpbin.org"
        }
    ]
}
curl \
    --user YOUR_API_KEY: \
    --header 'Content-Type: application/json' \
    --data @input.json \
    --compressed \
    https://api.zyte.com/v1/extract \
| jq --raw-output .httpResponseBody \
| base64 --decode
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Collections;
import java.util.Map;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;

class Example {
  private static final String API_KEY = "YOUR_API_KEY";

  public static void main(final String[] args)
      throws InterruptedException, IOException, ParseException {
    Map<String, String> cookies =
        ImmutableMap.of("name", "foo", "value", "bar", "domain", "httpbin.org");
    Map<String, Object> parameters =
        ImmutableMap.of(
            "url",
            "https://httpbin.org/cookies",
            "httpResponseBody",
            true,
            "requestCookies",
            Collections.singletonList(cookies));
    String requestBody = new Gson().toJson(parameters);

    HttpPost request = new HttpPost("https://api.zyte.com/v1/extract");
    request.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
    request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
    request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
    request.setEntity(new StringEntity(requestBody));

    try (CloseableHttpClient client = HttpClients.createDefault()) {
      try (CloseableHttpResponse response = client.execute(request)) {
        HttpEntity entity = response.getEntity();
        String apiResponse = EntityUtils.toString(entity, StandardCharsets.UTF_8);
        JsonObject jsonObject = JsonParser.parseString(apiResponse).getAsJsonObject();
        String base64HttpResponseBody = jsonObject.get("httpResponseBody").getAsString();
        byte[] httpResponseBodyBytes = Base64.getDecoder().decode(base64HttpResponseBody);
        String httpResponseBody = new String(httpResponseBodyBytes, StandardCharsets.UTF_8);
        System.out.println(httpResponseBody);
      }
    }
  }

  private static String buildAuthHeader() {
    String auth = API_KEY + ":";
    String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
    return "Basic " + encodedAuth;
  }
}
const axios = require('axios')

axios.post(
  'https://api.zyte.com/v1/extract',
  {
    url: 'https://httpbin.org/cookies',
    httpResponseBody: true,
    requestCookies: [
      {
        name: 'foo',
        value: 'bar',
        domain: 'httpbin.org'
      }
    ]
  },
  {
    auth: { username: 'YOUR_API_KEY' }
  }
).then((response) => {
  const httpResponseBody = Buffer.from(
    response.data.httpResponseBody,
    'base64'
  )
  console.log(httpResponseBody.toString())
})
<?php

$client = new GuzzleHttp\Client();
$response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
    'auth' => ['YOUR_API_KEY', ''],
    'headers' => ['Accept-Encoding' => 'gzip'],
    'json' => [
        'url' => 'https://httpbin.org/cookies',
        'httpResponseBody' => true,
        'requestCookies' => [
            [
                'name' => 'foo',
                'value' => 'bar',
                'domain' => 'httpbin.org',
            ],
        ],
    ],
]);
$api = json_decode($response->getBody());
$http_response_body = base64_decode($api->httpResponseBody);
echo $http_response_body;

With the proxy mode, the request Cookie header from your requests is used automatically to set cookies for the target URL domain.

curl \
    --proxy api.zyte.com:8011 \
    --proxy-user YOUR_API_KEY: \
    --compressed \
    -H "Cookie: foo=bar" \
    https://httpbin.org/cookies

Setting cookies for additional domains is not supported.

from base64 import b64decode

import requests

api_response = requests.post(
    "https://api.zyte.com/v1/extract",
    auth=("YOUR_API_KEY", ""),
    json={
        "url": "https://httpbin.org/cookies",
        "httpResponseBody": True,
        "requestCookies": [
            {
                "name": "foo",
                "value": "bar",
                "domain": "httpbin.org",
            },
        ],
    },
)
http_response_body = b64decode(api_response.json()["httpResponseBody"])
print(http_response_body.decode())
import asyncio
from base64 import b64decode

from zyte_api.aio.client import AsyncClient


async def main():
    client = AsyncClient()
    api_response = await client.request_raw(
        {
            "url": "https://httpbin.org/cookies",
            "httpResponseBody": True,
            "requestCookies": [
                {
                    "name": "foo",
                    "value": "bar",
                    "domain": "httpbin.org",
                },
            ],
        }
    )
    http_response_body = b64decode(api_response["httpResponseBody"])
    print(http_response_body.decode())


asyncio.run(main())
from scrapy import Request, Spider


class QuotesToScrapeComSpider(Spider):
    name = "quotes_toscrape_com"

    def start_requests(self):
        yield Request(
            "https://httpbin.org/cookies",
            meta={
                "zyte_api_automap": {
                    "requestCookies": [
                        {
                            "name": "foo",
                            "value": "bar",
                            "domain": "httpbin.org",
                        },
                    ],
                },
            },
        )

    def parse(self, response):
        print(response.text)

Output:

{
  "cookies": {
    "foo": "bar"
  }
}

A common usage pattern with cookies is to send a browser request with the responseCookies request field set to true to a webpage that requires a browser to generate a valid session cookie, and then copy the responseCookies response field value into the requestCookies request field of follow-up HTTP requests. This allows using sessions on websites as long as the target website only checks for the cookie presence, which is often the case.

Tip

Session contexts can be a better alternative for some use cases.

If you do not set request cookies, Zyte API may set some request cookies anyway to minimize bans. If you do not want that, set the cookieManagement request field to "discard"; requestCookies will still be used if defined.

Session contexts#

In web scraping, a session is a set of request conditions (IP address, network stack, cookies, etc.) that, when shared by two or more requests, make those requests seem like part of an organic web browsing session.

For some websites, Zyte API may use server-managed sessions to improve ban avoidance. Session contexts let you request such a server-managed session and define prerequisites for it.

Session contexts can be very helpful in some scenarios. For example:

  • If you have multiple browser requests that all share a set of initial actions for basic session setup, such as using the SetLocation action or similar, using session contexts can get you faster responses and give you extra run time for actions.

  • If you have multiple HTTP requests that need cookies from an earlier browser request, and you need those follow-up requests to be sent with the same session as the browser request, session contexts can give you that.

    Note

    Pricing-wise, requests that do not reuse a previous session count as browser requests, including action costs.

To assign a session context to a request:

Example

Note

Install and configure code example requirements and the Zyte CA certificate to run the example below.

Warning

This code example features logging into a website. While this is fine for the quotes.toscrape.com domain, please seek legal counsel before you start logging into other websites. The terms of use that you accept when creating an account on a website may forbid web scraping or automation in general.

using System;
using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;
using System.Xml.XPath;
using HtmlAgilityPack;

HttpClientHandler handler = new HttpClientHandler()
{
    AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);

var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);

client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");

var input = new Dictionary<string, object>(){
    {"url", "https://quotes.toscrape.com"},
    {"httpResponseBody", true},
    {
        "sessionContext",
        new List<Dictionary<string, string>>()
        {
            new Dictionary<string, string>()
            {
                {"name", "id"},
                {"value", "login"}
            }
        }
    },
    {
        "sessionContextParameters",
        new Dictionary<string, object>()
        {
            {
                "actions",
                new List<Dictionary<string, object>>()
                {
                    new Dictionary<string, object>()
                    {
                        {"action", "click"},
                        {
                            "selector",
                            new Dictionary<string, string>()
                            {
                                {"type", "css"},
                                {"value", "[href='/login']"}
                            }
                        }
                    },
                    new Dictionary<string, object>()
                    {
                        {"action", "waitForSelector"},
                        {
                            "selector",
                            new Dictionary<string, object>()
                            {
                                {"type", "css"},
                                {"value", "#username"}
                            }
                        }
                    },
                    new Dictionary<string, object>()
                    {
                        {"action", "type"},
                        {
                            "selector",
                            new Dictionary<string, object>()
                            {
                                {"type", "css"},
                                {"value", "#username"}
                            }
                        },
                        {"text", "user"}
                    },
                    new Dictionary<string, object>()
                    {
                        {"action", "type"},
                        {
                            "selector",
                            new Dictionary<string, object>()
                            {
                                {"type", "css"},
                                {"value", "#password"}
                            }
                        },
                        {"text", "password"}
                    },
                    new Dictionary<string, object>()
                    {
                        {"action", "click"},
                        {
                            "selector",
                            new Dictionary<string, string>()
                            {
                                {"type", "css"},
                                {"value", "[type='submit']"}
                            }
                        }
                    },
                    new Dictionary<string, object>()
                    {
                        {"action", "waitForNavigation"},
                        {"timeout", 31}
                    }
                }
            }
        }
    }
};
var inputJson = JsonSerializer.Serialize(input);
var content = new StringContent(inputJson, Encoding.UTF8, "application/json");

HttpResponseMessage response = await client.PostAsync("https://api.zyte.com/v1/extract", content);
var body = await response.Content.ReadAsByteArrayAsync();

var data = JsonDocument.Parse(body);
var base64HttpResponseBody = data.RootElement.GetProperty("httpResponseBody").ToString();
var httpResponseBodyBytes = System.Convert.FromBase64String(base64HttpResponseBody);
var httpResponseBody = System.Text.Encoding.UTF8.GetString(httpResponseBodyBytes);
var htmlDocument = new HtmlDocument();
htmlDocument.LoadHtml(httpResponseBody);
var navigator = htmlDocument.CreateNavigator();
var nodeIterator = (XPathNodeIterator)navigator.Evaluate("//a[@href='/logout']/text()");
nodeIterator.MoveNext();
var logout = nodeIterator.Current.ToString();

Console.WriteLine(logout);
input.jsonl#
{"url": "https://quotes.toscrape.com", "httpResponseBody": true, "sessionContext": [{"name": "id", "value": "login"}], "sessionContextParameters": {"actions": [{"action": "click", "selector": {"type": "css", "value": "[href='/login']"}}, {"action": "waitForSelector", "selector": {"type": "css", "value": "#username"}}, {"action": "type", "selector": {"type": "css", "value": "#username"}, "text": "user"}, {"action": "type", "selector": {"type": "css", "value": "#password"}, "text": "password"}, {"action": "click", "selector": {"type": "css", "value": "[type='submit']"}}, {"action": "waitForNavigation", "timeout": 31}]}}
zyte-api input.jsonl \
    | jq --raw-output .httpResponseBody \
    | base64 --decode \
    | xmllint --html --xpath '//a[@href="/logout"]/text()' - 2> /dev/null
input.json#
{
    "url": "https://quotes.toscrape.com",
    "httpResponseBody": true,
    "sessionContext": [
        {
            "name": "id",
            "value": "login"
        }
    ],
    "sessionContextParameters": {
        "actions": [
            {
                "action": "click",
                "selector": {
                    "type": "css",
                    "value": "[href='/login']"
                }
            },
            {
                "action": "waitForSelector",
                "selector": {
                    "type": "css",
                    "value": "#username"
                }
            },
            {
                "action": "type",
                "selector": {
                    "type": "css",
                    "value": "#username"
                },
                "text": "user"
            },
            {
                "action": "type",
                "selector": {
                    "type": "css",
                    "value": "#password"
                },
                "text": "password"
            },
            {
                "action": "click",
                "selector": {
                    "type": "css",
                    "value": "[type='submit']"
                }
            },
            {
                "action": "waitForNavigation",
                "timeout": 31
            }
        ]
    }
}
curl \
    --user YOUR_API_KEY: \
    --header 'Content-Type: application/json' \
    --data @input.json \
    --compressed \
    https://api.zyte.com/v1/extract \
    | jq --raw-output .httpResponseBody \
    | base64 --decode \
    | xmllint --html --xpath '//a[@href="/logout"]/text()' - 2> /dev/null
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Map;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

class Example {

  private static final String API_KEY = "YOUR_API_KEY";

  public static void main(final String[] args)
      throws InterruptedException, IOException, ParseException {
    Map<String, Object> parameters =
        ImmutableMap.of(
            "url",
            "https://quotes.toscrape.com",
            "httpResponseBody",
            true,
            "sessionContext",
            ImmutableList.of(ImmutableMap.of("name", "id", "value", "login")),
            "sessionContextParameters",
            ImmutableMap.of(
                "actions",
                ImmutableList.of(
                    ImmutableMap.of(
                        "action",
                        "click",
                        "selector",
                        ImmutableMap.of("type", "css", "value", "[href='/login']")),
                    ImmutableMap.of(
                        "action",
                        "waitForSelector",
                        "selector",
                        ImmutableMap.of("type", "css", "value", "#username")),
                    ImmutableMap.of(
                        "action", "type",
                        "selector", ImmutableMap.of("type", "css", "value", "#username"),
                        "text", "user"),
                    ImmutableMap.of(
                        "action", "type",
                        "selector", ImmutableMap.of("type", "css", "value", "#password"),
                        "text", "password"),
                    ImmutableMap.of(
                        "action",
                        "click",
                        "selector",
                        ImmutableMap.of("type", "css", "value", "[type='submit']")),
                    ImmutableMap.of("action", "waitForNavigation", "timeout", 31))));

    String requestBody = new Gson().toJson(parameters);

    HttpPost request = new HttpPost("https://api.zyte.com/v1/extract");
    request.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
    request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
    request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
    request.setEntity(new StringEntity(requestBody));

    try (CloseableHttpClient client = HttpClients.createDefault()) {
      try (CloseableHttpResponse response = client.execute(request)) {
        HttpEntity entity = response.getEntity();
        String apiResponse = EntityUtils.toString(entity, StandardCharsets.UTF_8);
        JsonObject jsonObject = JsonParser.parseString(apiResponse).getAsJsonObject();
        String base64HttpResponseBody = jsonObject.get("httpResponseBody").getAsString();
        byte[] httpResponseBodyBytes = Base64.getDecoder().decode(base64HttpResponseBody);
        String httpResponseBody = new String(httpResponseBodyBytes, StandardCharsets.UTF_8);
        Document document = Jsoup.parse(httpResponseBody);
        String logout = document.select("[href='/logout']").text();
        System.out.println(logout);
      }
    }
  }

  private static String buildAuthHeader() {
    String auth = API_KEY + ":";
    String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
    return "Basic " + encodedAuth;
  }
}
const axios = require('axios')
const cheerio = require('cheerio')

axios.post(
  'https://api.zyte.com/v1/extract',
  {
    url: 'https://quotes.toscrape.com',
    httpResponseBody: true,
    sessionContext: [
      {
        name: 'id',
        value: 'login'
      }
    ],
    sessionContextParameters: {
      actions: [
        {
          action: 'click',
          selector: {
            type: 'css',
            value: "[href='/login']"
          }
        },
        {
          action: 'waitForSelector',
          selector: {
            type: 'css',
            value: '#username'
          }
        },
        {
          action: 'type',
          selector: {
            type: 'css',
            value: '#username'
          },
          text: 'user'
        },
        {
          action: 'type',
          selector: {
            type: 'css',
            value: '#password'
          },
          text: 'password'
        },
        {
          action: 'click',
          selector: {
            type: 'css',
            value: "[type='submit']"
          }
        },
        {
          action: 'waitForNavigation',
          timeout: 31
        }
      ]
    }
  },
  {
    auth: { username: 'YOUR_API_KEY' }
  }
).then((response) => {
  const httpResponseBody = Buffer.from(
    response.data.httpResponseBody,
    'base64'
  )
  const $ = cheerio.load(httpResponseBody)
  const logout = $("[href='/logout']").text()
  console.log(logout)
})
<?php

$client = new GuzzleHttp\Client();
$response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
    'auth' => ['YOUR_API_KEY', ''],
    'headers' => ['Accept-Encoding' => 'gzip'],
    'json' => [
        'url' => 'https://quotes.toscrape.com',
        'httpResponseBody' => true,
        'sessionContext' => [
            [
                'name' => 'id',
                'value' => 'login',
            ],
        ],
        'sessionContextParameters' => [
            'actions' => [
                [
                    'action' => 'click',
                    'selector' => [
                        'type' => 'css',
                        'value' => "[href='/login']",
                    ],
                ],
                [
                    'action' => 'waitForSelector',
                    'selector' => [
                        'type' => 'css',
                        'value' => '#username',
                    ],
                ],
                [
                    'action' => 'type',
                    'selector' => [
                        'type' => 'css',
                        'value' => '#username',
                    ],
                    'text' => 'user',
                ],
                [
                    'action' => 'type',
                    'selector' => [
                        'type' => 'css',
                        'value' => '#password',
                    ],
                    'text' => 'password',
                ],
                [
                    'action' => 'click',
                    'selector' => [
                        'type' => 'css',
                        'value' => "[type='submit']",
                    ],
                ],
                [
                    'action' => 'waitForNavigation',
                    'timeout' => 31,
                ],
            ],
        ],
    ],
]);
$data = json_decode($response->getBody());
$http_response_body = base64_decode($data->httpResponseBody);
$doc = new DOMDocument();
$doc->loadHTML($http_response_body);
$xpath = new DOMXPath($doc);
$logout = $xpath->query("//a[@href='/logout']/text()")->item(0)->nodeValue;
echo $logout."\n";
from base64 import b64decode

import requests
from parsel import Selector

api_response = requests.post(
    "https://api.zyte.com/v1/extract",
    auth=("YOUR_API_KEY", ""),
    json={
        "url": "https://quotes.toscrape.com",
        "httpResponseBody": True,
        "sessionContext": [
            {
                "name": "id",
                "value": "login",
            },
        ],
        "sessionContextParameters": {
            "actions": [
                {
                    "action": "click",
                    "selector": {
                        "type": "css",
                        "value": "[href='/login']",
                    },
                },
                {
                    "action": "waitForSelector",
                    "selector": {
                        "type": "css",
                        "value": "#username",
                    },
                },
                {
                    "action": "type",
                    "selector": {
                        "type": "css",
                        "value": "#username",
                    },
                    "text": "user",
                },
                {
                    "action": "type",
                    "selector": {
                        "type": "css",
                        "value": "#password",
                    },
                    "text": "password",
                },
                {
                    "action": "click",
                    "selector": {
                        "type": "css",
                        "value": "[type='submit']",
                    },
                },
                {
                    "action": "waitForNavigation",
                    "timeout": 31,
                },
            ],
        },
    },
)
http_response_body_bytes = b64decode(api_response.json()["httpResponseBody"])
http_response_body = http_response_body_bytes.decode()
logout = Selector(http_response_body).css("[href='/logout'] ::text").get()
print(logout)
import asyncio
from base64 import b64decode

from parsel import Selector
from zyte_api.aio.client import AsyncClient


async def main():
    client = AsyncClient()
    api_response = await client.request_raw(
        {
            "url": "https://quotes.toscrape.com",
            "httpResponseBody": True,
            "sessionContext": [
                {
                    "name": "id",
                    "value": "login",
                },
            ],
            "sessionContextParameters": {
                "actions": [
                    {
                        "action": "click",
                        "selector": {
                            "type": "css",
                            "value": "[href='/login']",
                        },
                    },
                    {
                        "action": "waitForSelector",
                        "selector": {
                            "type": "css",
                            "value": "#username",
                        },
                    },
                    {
                        "action": "type",
                        "selector": {
                            "type": "css",
                            "value": "#username",
                        },
                        "text": "user",
                    },
                    {
                        "action": "type",
                        "selector": {
                            "type": "css",
                            "value": "#password",
                        },
                        "text": "password",
                    },
                    {
                        "action": "click",
                        "selector": {
                            "type": "css",
                            "value": "[type='submit']",
                        },
                    },
                    {
                        "action": "waitForNavigation",
                        "timeout": 31,
                    },
                ],
            },
        },
    )
    http_response_body_bytes = b64decode(api_response["httpResponseBody"])
    http_response_body = http_response_body_bytes.decode()
    logout = Selector(http_response_body).css("[href='/logout'] ::text").get()
    print(logout)


asyncio.run(main())
from scrapy import Request, Spider


class QuotesToScrapeComSpider(Spider):
    name = "quotes_toscrape_com"

    def start_requests(self):
        yield Request(
            "https://quotes.toscrape.com",
            meta={
                "zyte_api_automap": {
                    "sessionContext": [
                        {
                            "name": "id",
                            "value": "login",
                        },
                    ],
                    "sessionContextParameters": {
                        "actions": [
                            {
                                "action": "click",
                                "selector": {
                                    "type": "css",
                                    "value": "[href='/login']",
                                },
                            },
                            {
                                "action": "waitForSelector",
                                "selector": {
                                    "type": "css",
                                    "value": "#username",
                                },
                            },
                            {
                                "action": "type",
                                "selector": {
                                    "type": "css",
                                    "value": "#username",
                                },
                                "text": "user",
                            },
                            {
                                "action": "type",
                                "selector": {
                                    "type": "css",
                                    "value": "#password",
                                },
                                "text": "password",
                            },
                            {
                                "action": "click",
                                "selector": {
                                    "type": "css",
                                    "value": "[type='submit']",
                                },
                            },
                            {
                                "action": "waitForNavigation",
                                "timeout": 31,
                            },
                        ],
                    },
                },
            },
        )

    def parse(self, response):
        print(response.css("[href='/logout'] ::text").get())

Output:

Logout

Every request that you send with the same value in sessionContext will use a session that was initialized with sessionContextParameters. All those requests should also always include the sessionContextParameters request field with the same value.

Zyte API handles creation, reuse, and deletion of sessions requested through sessionContext, meaning:

  • When you send requests with the same sessionContext, they may use the same session, or use separate sessions that were both initialized with sessionContextParameters.

  • You cannot manually delete a session. However, you can change the value of sessionContext, even if sessionContextParameters remains the same, so that your requests will not reuse sessions created with the previous value.

Redirection#

Zyte API always follows HTTP redirection.

On browser or automatic extraction requests, a redirection triggered by HTML or JavaScript is also followed.

Response headers#

Set the httpResponseHeaders request field to true to get HTTP response headers in the httpResponseHeaders response field.

Example

Note

Install and configure code example requirements and the Zyte CA certificate to run the example below.

using System.Collections.Generic;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;

HttpClientHandler handler = new HttpClientHandler()
{
    AutomaticDecompression = DecompressionMethods.All
};
HttpClient client = new HttpClient(handler);

var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);

client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");

var input = new Dictionary<string, object>(){
    {"url", "https://toscrape.com"},
    {"httpResponseHeaders", true}
};
var inputJson = JsonSerializer.Serialize(input);
var content = new StringContent(inputJson, Encoding.UTF8, "application/json");

HttpResponseMessage response = await client.PostAsync("https://api.zyte.com/v1/extract", content);
var body = await response.Content.ReadAsByteArrayAsync();

var data = JsonDocument.Parse(body);
var headerEnumerator = data.RootElement.GetProperty("httpResponseHeaders").EnumerateArray();
var headers = new Dictionary<string, string>();
while (headerEnumerator.MoveNext())
{
    headers.Add(
        headerEnumerator.Current.GetProperty("name").ToString(),
        headerEnumerator.Current.GetProperty("value").ToString()
    );
}
input.jsonl#
{"url": "https://toscrape.com", "httpResponseHeaders": true}
zyte-api input.jsonl \
    | jq .httpResponseHeaders
input.json#
{
    "url": "https://toscrape.com",
    "httpResponseHeaders": true
}
curl \
    --user YOUR_API_KEY: \
    --header 'Content-Type: application/json' \
    --data @input.json \
    --compressed \
    https://api.zyte.com/v1/extract \
    | jq .httpResponseHeaders
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Map;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.entity.StringEntity;

class Example {
  private static final String API_KEY = "YOUR_API_KEY";

  public static void main(final String[] args)
      throws InterruptedException, IOException, ParseException {
    Map<String, Object> parameters =
        ImmutableMap.of(
            "url", "https://toscrape.com", "browserHtml", true, "httpResponseHeaders", true);
    String requestBody = new Gson().toJson(parameters);

    HttpPost request = new HttpPost("https://api.zyte.com/v1/extract");
    request.setHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON);
    request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
    request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
    request.setEntity(new StringEntity(requestBody));

    try (CloseableHttpClient client = HttpClients.createDefault()) {
      try (CloseableHttpResponse response = client.execute(request)) {
        HttpEntity entity = response.getEntity();
        String apiResponse = EntityUtils.toString(entity, StandardCharsets.UTF_8);
        JsonObject jsonObject = JsonParser.parseString(apiResponse).getAsJsonObject();
        JsonArray httpResponseHeaders = jsonObject.get("httpResponseHeaders").getAsJsonArray();
      }
    }
  }

  private static String buildAuthHeader() {
    String auth = API_KEY + ":";
    String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
    return "Basic " + encodedAuth;
  }
}
const axios = require('axios')

axios.post(
  'https://api.zyte.com/v1/extract',
  {
    url: 'https://toscrape.com',
    httpResponseHeaders: true
  },
  {
    auth: { username: 'YOUR_API_KEY' }
  }
).then((response) => {
  const httpResponseHeaders = response.data.httpResponseHeaders
})
<?php

$client = new GuzzleHttp\Client();
$response = $client->request('POST', 'https://api.zyte.com/v1/extract', [
    'auth' => ['YOUR_API_KEY', ''],
    'headers' => ['Accept-Encoding' => 'gzip'],
    'json' => [
        'url' => 'https://toscrape.com',
        'httpResponseHeaders' => true,
    ],
]);
$api = json_decode($response->getBody());
$http_response_headers = $api->httpResponseHeaders;

With the proxy mode, response headers are always included in the HTTP response, no need to ask for them explicitly.

import requests

api_response = requests.post(
    "https://api.zyte.com/v1/extract",
    auth=("YOUR_API_KEY", ""),
    json={
        "url": "https://toscrape.com",
        "httpResponseHeaders": True,
    },
)
http_response_headers = api_response.json()["httpResponseHeaders"]
import asyncio

from zyte_api.aio.client import AsyncClient


async def main():
    client = AsyncClient()
    api_response = await client.request_raw(
        {
            "url": "https://toscrape.com",
            "httpResponseHeaders": True,
        }
    )
    http_response_headers = api_response["httpResponseHeaders"]


asyncio.run(main())
from scrapy import Request, Spider


class ToScrapeComSpider(Spider):
    name = "toscrape_com"

    def start_requests(self):
        yield Request(
            "https://toscrape.com",
            meta={
                "zyte_api_automap": {
                    "httpResponseBody": False,
                    "httpResponseHeaders": True,
                },
            },
        )

    def parse(self, response):
        headers = response.headers

Note

In transparent mode, httpResponseHeaders is sent by default for httpResponseBody requests, but sending it explicitly is still recommended, as future versions of scrapy-zyte-api may stop sending it by default.

Output (first 5 lines):

[
  {
    "name": "date",
    "value": "Fri, 25 Aug 2023 07:08:05 GMT"
  },

Note

Reading cookies from Set-Cookie response headers is not recommended, because it only contains the cookies set by the final response, it does not account for cookies set during redirection or during browser rendering. Better use responseCookies as described in Cookies.

Metadata#

Set the echoData request field to an arbitrary value, to get that value verbatim in the echoData response field.

Example

Note

Install and configure code example requirements and the Zyte CA certificate to run the example below.

using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Threading.Tasks;

var inputData = new List<List<object>>()
{
    new List<object>(){"https://toscrape.com", 1},
    new List<object>(){"https://books.toscrape.com", 2},
    new List<object>(){"https://quotes.toscrape.com", 3},
};
var output = new List<HttpResponseMessage>();

var handler = new HttpClientHandler()
{
    AutomaticDecompression = DecompressionMethods.All,
    MaxConnectionsPerServer = 15
};
var client = new HttpClient(handler);

var apiKey = "YOUR_API_KEY";
var bytes = Encoding.GetEncoding("ISO-8859-1").GetBytes(apiKey + ":");
var auth = System.Convert.ToBase64String(bytes);
client.DefaultRequestHeaders.Add("Authorization", "Basic " + auth);

client.DefaultRequestHeaders.Add("Accept-Encoding", "br, gzip, deflate");

var responseTasks = new List<Task<HttpResponseMessage>>();
foreach (var entry in inputData)
{
    var input = new Dictionary<string, object>(){
        {"url", entry[0]},
        {"browserHtml", true},
        {"echoData", entry[1]}
    };
    var inputJson = JsonSerializer.Serialize(input);
    var content = new StringContent(inputJson, Encoding.UTF8, "application/json");
    var responseTask = client.PostAsync("https://api.zyte.com/v1/extract", content);
    responseTasks.Add(responseTask);
}

while (responseTasks.Any())
{
    var responseTask = await Task.WhenAny(responseTasks);
    responseTasks.Remove(responseTask);
    var response = await responseTask;
    output.Add(response);
}
input.jsonl#
{"url": "https://toscrape.com", "browserHtml": true, "echoData": 1}
{"url": "https://books.toscrape.com", "browserHtml": true, "echoData": 2}
{"url": "https://quotes.toscrape.com", "browserHtml": true, "echoData": 3}
zyte-api --n-conn 15 input.jsonl -o output.jsonl
input.jsonl#
{"url": "https://toscrape.com", "browserHtml": true, "echoData": 1}
{"url": "https://books.toscrape.com", "browserHtml": true, "echoData": 2}
{"url": "https://quotes.toscrape.com", "browserHtml": true, "echoData": 3}
cat input.jsonl \
    | xargs -P 15 -d\\n -n 1 \
    bash -c "
        curl \
            --user $ZYTE_API_KEY: \
            --header 'Content-Type: application/json' \
            --data \"\$0\" \
            --compressed \
            https://api.zyte.com/v1/extract \
        | jq .echoData \
        | awk '{print \$1}' \
        >> output.jsonl
"
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Base64;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import org.apache.hc.client5.http.async.methods.SimpleHttpRequest;
import org.apache.hc.client5.http.async.methods.SimpleHttpResponse;
import org.apache.hc.client5.http.impl.async.CloseableHttpAsyncClient;
import org.apache.hc.client5.http.impl.async.HttpAsyncClients;
import org.apache.hc.client5.http.impl.nio.PoolingAsyncClientConnectionManager;
import org.apache.hc.client5.http.impl.nio.PoolingAsyncClientConnectionManagerBuilder;
import org.apache.hc.client5.http.ssl.ClientTlsStrategyBuilder;
import org.apache.hc.core5.concurrent.FutureCallback;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.nio.ssl.TlsStrategy;
import org.apache.hc.core5.reactor.ssl.TlsDetails;

class Example {
  private static final String API_KEY = "YOUR_API_KEY";

  public static void main(final String[] args)
      throws ExecutionException, InterruptedException, IOException, ParseException {

    Object[][] input = {
      {"https://toscrape.com", 1},
      {"https://bookstoscrape.com", 2},
      {"https://quotes.toscrape.com", 3}
    };
    List<Future> futures = new ArrayList<Future>();
    List<String> output = new ArrayList<String>();

    int concurrency = 15;

    // https://issues.apache.org/jira/browse/HTTPCLIENT-2219
    final TlsStrategy tlsStrategy =
        ClientTlsStrategyBuilder.create()
            .useSystemProperties()
            .setTlsDetailsFactory(
                sslEngine ->
                    new TlsDetails(sslEngine.getSession(), sslEngine.getApplicationProtocol()))
            .build();

    PoolingAsyncClientConnectionManager connectionManager =
        PoolingAsyncClientConnectionManagerBuilder.create().setTlsStrategy(tlsStrategy).build();
    connectionManager.setMaxTotal(concurrency);
    connectionManager.setDefaultMaxPerRoute(concurrency);

    CloseableHttpAsyncClient client =
        HttpAsyncClients.custom().setConnectionManager(connectionManager).build();
    try {
      client.start();
      for (int i = 0; i < input.length; i++) {
        Map<String, Object> parameters =
            ImmutableMap.of("url", input[i][0], "browserHtml", true, "echoData", input[i][1]);
        String requestBody = new Gson().toJson(parameters);

        SimpleHttpRequest request =
            new SimpleHttpRequest("POST", "https://api.zyte.com/v1/extract");
        request.setHeader(HttpHeaders.AUTHORIZATION, buildAuthHeader());
        request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate");
        request.setBody(requestBody, ContentType.APPLICATION_JSON);

        final Future<SimpleHttpResponse> future =
            client.execute(
                request,
                new FutureCallback<SimpleHttpResponse>() {
                  public void completed(final SimpleHttpResponse response) {
                    String apiResponse = response.getBodyText();
                    output.add(apiResponse);
                  }

                  public void failed(final Exception ex) {}

                  public void cancelled() {}
                });
        futures.add(future);
      }
      for (int i = 0; i < futures.size(); i++) {
        futures.get(i).get();
      }
    } finally {
      client.close();
    }
  }

  private static String buildAuthHeader() {
    String auth = API_KEY + ":";
    String encodedAuth = Base64.getEncoder().encodeToString(auth.getBytes());
    return "Basic " + encodedAuth;
  }
}
const { ConcurrencyManager } = require('axios-concurrency')
const axios = require('axios')

const urls = [
  ['https://toscrape.com', 1],
  ['https://books.toscrape.com', 2],
  ['https://quotes.toscrape.com', 3]
]
const output = []

const client = axios.create()
ConcurrencyManager(client, 15)

Promise.all(
  urls.map((input) =>
    client.post(
      'https://api.zyte.com/v1/extract',
      { url: input[0], browserHtml: true, echoData: input[1] },
      {
        auth: { username: 'YOUR_API_KEY' }
      }
    ).then((response) => output.push(response.data))
  )
)
<?php

$input = [
    ['https://toscrape.com', 1],
    ['https://books.toscrape.com', 2],
    ['https://quotes.toscrape.com', 3],
];
$output = [];
$promises = [];

$client = new GuzzleHttp\Client();

foreach ($input as $url_and_index) {
    $options = [
        'auth' => ['YOUR_API_KEY', ''],
        'headers' => ['Accept-Encoding' => 'gzip'],
        'json' => [
            'url' => $url_and_index[0],
            'browserHtml' => true,
            'echoData' => $url_and_index[1],
        ],
    ];
    $request = new \GuzzleHttp\Psr7\Request('POST', 'https://api.zyte.com/v1/extract');
    global $promises;
    $promises[] = $client->sendAsync($request, $options)->then(function ($response) {
        global $output;
        $output[] = json_decode($response->getBody());
    });
}

foreach ($promises as $promise) {
    $promise->wait();
}
import asyncio

import aiohttp

input_data = [
    ("https://toscrape.com", 1),
    ("https://books.toscrape.com", 2),
    ("https://quotes.toscrape.com", 3),
]
output = []


async def extract(client, url, index):
    response = await client.post(
        "https://api.zyte.com/v1/extract",
        json={"url": url, "browserHtml": True, "echoData": index},
        auth=aiohttp.BasicAuth("YOUR_API_KEY"),
    )
    output.append(await response.json())


async def main():
    connector = aiohttp.TCPConnector(limit_per_host=15)
    async with aiohttp.ClientSession(connector=connector) as client:
        await asyncio.gather(
            *[extract(client, url, index) for url, index in input_data]
        )


asyncio.run(main())
import asyncio

from zyte_api.aio.client import AsyncClient, create_session

input_data = [
    ("https://toscrape.com", 1),
    ("https://books.toscrape.com", 2),
    ("https://quotes.toscrape.com", 3),
]
output = []


async def main():
    connection_count = 15
    client = AsyncClient(n_conn=connection_count)
    requests = [
        {"url": url, "browserHtml": True, "echoData": index}
        for url, index in input_data
    ]
    async with create_session(connection_count) as session:
        responses = client.request_parallel_as_completed(
            requests,
            session=session,
        )
        for response in responses:
            output.append(await response)


asyncio.run(main())
from scrapy import Request, Spider

input_data = [
    ("https://toscrape.com", 1),
    ("https://books.toscrape.com", 2),
    ("https://quotes.toscrape.com", 3),
]


class ToScrapeSpider(Spider):
    name = "toscrape_com"

    custom_settings = {
        "CONCURRENT_REQUESTS": 15,
        "CONCURRENT_REQUESTS_PER_DOMAIN": 15,
    }

    def start_requests(self):
        for url, index in input_data:
            yield Request(
                url,
                meta={
                    "zyte_api_automap": {
                        "browserHtml": True,
                        "echoData": index,
                    },
                },
            )

    def parse(self, response):
        yield {
            "index": response.raw_api_response["echoData"],
            "html": response.text,
        }

Alternatively, you can use Scrapy’s Request.cb_kwargs directly for a similar purpose:


    def start_requests(self):
        for url, index in input_data:
            yield Request(
                url,
                cb_kwargs={"index": index},
                meta={
                    "zyte_api_automap": {
                        "browserHtml": True,
                    },
                },
            )

    def parse(self, response, index):
        yield {
            "index": index,
            "html": response.text,
        }

Output:

{"url": "https://quotes.toscrape.com/", "statusCode": 200, "browserHtml": "<!DOCTYPE html><html lang=\"en\"><head>\n\t<meta charset=\"UTF-8\">\n\t<title>Quotes to Scrape</title>\n    <link rel=\"stylesheet\" href=\"/static/bootstrap.min.css\">\n    <link rel=\"stylesheet\" href=\"/static/main.css\">\n</head>\n<body>\n    <div class=\"container\">\n        <div class=\"row header-box\">\n            <div class=\"col-md-8\">\n                <h1>\n                    <a href=\"/\" style=\"text-decoration: none\">Quotes to Scrape</a>\n                </h1>\n            </div>\n            <div class=\"col-md-4\">\n                <p>\n                \n                    <a href=\"/login\">Login</a>\n                \n                </p>\n            </div>\n        </div>\n    \n\n<div class=\"row\">\n    <div class=\"col-md-8\">\n\n    <div class=\"quote\" itemscope=\"\" itemtype=\"http://schema.org/CreativeWork\">\n        <span class=\"text\" itemprop=\"text\">“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”</span>\n        <span>by <small class=\"author\" itemprop=\"author\">Albert Einstein</small>\n        <a href=\"/author/Albert-Einstein\">(about)</a>\n        </span>\n        <div class=\"tags\">\n            Tags:\n            <meta class=\"keywords\" itemprop=\"keywords\" content=\"change,deep-thoughts,thinking,world\"> \n            \n            <a class=\"tag\" href=\"/tag/change/page/1/\">change</a>\n            \n            <a class=\"tag\" href=\"/tag/deep-thoughts/page/1/\">deep-thoughts</a>\n            \n            <a class=\"tag\" href=\"/tag/thinking/page/1/\">thinking</a>\n            \n            <a class=\"tag\" href=\"/tag/world/page/1/\">world</a>\n            \n        </div>\n    </div>\n\n    <div class=\"quote\" itemscope=\"\" itemtype=\"http://schema.org/CreativeWork\">\n        <span class=\"text\" itemprop=\"text\">“It is our choices, Harry, that show what we truly are, far more than our abilities.”</span>\n        <span>by <small class=\"author\" itemprop=\"author\">J.K. Rowling</small>\n        <a href=\"/author/J-K-Rowling\">(about)</a>\n        </span>\n        <div class=\"tags\">\n            Tags:\n            <meta class=\"keywords\" itemprop=\"keywords\" content=\"abilities,choices\"> \n            \n            <a class=\"tag\" href=\"/tag/abilities/page/1/\">abilities</a>\n            \n            <a class=\"tag\" href=\"/tag/choices/page/1/\">choices</a>\n            \n        </div>\n    </div>\n\n    <div class=\"quote\" itemscope=\"\" itemtype=\"http://schema.org/CreativeWork\">\n        <span class=\"text\" itemprop=\"text\">“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”</span>\n        <span>by <small class=\"author\" itemprop=\"author\">Albert Einstein</small>\n        <a href=\"/author/Albert-Einstein\">(about)</a>\n        </span>\n        <div class=\"tags\">\n            Tags:\n            <meta class=\"keywords\" itemprop=\"keywords\" content=\"inspirational,life,live,miracle,miracles\"> \n            \n            <a class=\"tag\" href=\"/tag/inspirational/page/1/\">inspirational</a>\n            \n            <a class=\"tag\" href=\"/tag/life/page/1/\">life</a>\n            \n            <a class=\"tag\" href=\"/tag/live/page/1/\">live</a>\n            \n            <a class=\"tag\" href=\"/tag/miracle/page/1/\">miracle</a>\n            \n            <a class=\"tag\" href=\"/tag/miracles/page/1/\">miracles</a>\n            \n        </div>\n    </div>\n\n    <div class=\"quote\" itemscope=\"\" itemtype=\"http://schema.org/CreativeWork\">\n        <span class=\"text\" itemprop=\"text\">“The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.”</span>\n        <span>by <small class=\"author\" itemprop=\"author\">Jane Austen</small>\n        <a href=\"/author/Jane-Austen\">(about)</a>\n        </span>\n        <div class=\"tags\">\n            Tags:\n            <meta class=\"keywords\" itemprop=\"keywords\" content=\"aliteracy,books,classic,humor\"> \n            \n            <a class=\"tag\" href=\"/tag/aliteracy/page/1/\">aliteracy</a>\n            \n            <a class=\"tag\" href=\"/tag/books/page/1/\">books</a>\n            \n            <a class=\"tag\" href=\"/tag/classic/page/1/\">classic</a>\n            \n            <a class=\"tag\" href=\"/tag/humor/page/1/\">humor</a>\n            \n        </div>\n    </div>\n\n    <div class=\"quote\" itemscope=\"\" itemtype=\"http://schema.org/CreativeWork\">\n        <span class=\"text\" itemprop=\"text\">“Imperfection is beauty, madness is genius and it's better to be absolutely ridiculous than absolutely boring.”</span>\n        <span>by <small class=\"author\" itemprop=\"author\">Marilyn Monroe</small>\n        <a href=\"/author/Marilyn-Monroe\">(about)</a>\n        </span>\n        <div class=\"tags\">\n            Tags:\n            <meta class=\"keywords\" itemprop=\"keywords\" content=\"be-yourself,inspirational\"> \n            \n            <a class=\"tag\" href=\"/tag/be-yourself/page/1/\">be-yourself</a>\n            \n            <a class=\"tag\" href=\"/tag/inspirational/page/1/\">inspirational</a>\n            \n        </div>\n    </div>\n\n    <div class=\"quote\" itemscope=\"\" itemtype=\"http://schema.org/CreativeWork\">\n        <span class=\"text\" itemprop=\"text\">“Try not to become a man of success. Rather become a man of value.”</span>\n        <span>by <small class=\"author\" itemprop=\"author\">Albert Einstein</small>\n        <a href=\"/author/Albert-Einstein\">(about)</a>\n        </span>\n        <div class=\"tags\">\n            Tags:\n            <meta class=\"keywords\" itemprop=\"keywords\" content=\"adulthood,success,value\"> \n            \n            <a class=\"tag\" href=\"/tag/adulthood/page/1/\">adulthood</a>\n            \n            <a class=\"tag\" href=\"/tag/success/page/1/\">success</a>\n            \n            <a class=\"tag\" href=\"/tag/value/page/1/\">value</a>\n            \n        </div>\n    </div>\n\n    <div class=\"quote\" itemscope=\"\" itemtype=\"http://schema.org/CreativeWork\">\n        <span class=\"text\" itemprop=\"text\">“It is better to be hated for what you are than to be loved for what you are not.”</span>\n        <span>by <small class=\"author\" itemprop=\"author\">André Gide</small>\n        <a href=\"/author/Andre-Gide\">(about)</a>\n        </span>\n        <div class=\"tags\">\n            Tags:\n            <meta class=\"keywords\" itemprop=\"keywords\" content=\"life,love\"> \n            \n            <a class=\"tag\" href=\"/tag/life/page/1/\">life</a>\n            \n            <a class=\"tag\" href=\"/tag/love/page/1/\">love</a>\n            \n        </div>\n    </div>\n\n    <div class=\"quote\" itemscope=\"\" itemtype=\"http://schema.org/CreativeWork\">\n        <span class=\"text\" itemprop=\"text\">“I have not failed. I've just found 10,000 ways that won't work.”</span>\n        <span>by <small class=\"author\" itemprop=\"author\">Thomas A. Edison</small>\n        <a href=\"/author/Thomas-A-Edison\">(about)</a>\n        </span>\n        <div class=\"tags\">\n            Tags:\n            <meta class=\"keywords\" itemprop=\"keywords\" content=\"edison,failure,inspirational,paraphrased\"> \n            \n            <a class=\"tag\" href=\"/tag/edison/page/1/\">edison</a>\n            \n            <a class=\"tag\" href=\"/tag/failure/page/1/\">failure</a>\n            \n            <a class=\"tag\" href=\"/tag/inspirational/page/1/\">inspirational</a>\n            \n            <a class=\"tag\" href=\"/tag/paraphrased/page/1/\">paraphrased</a>\n            \n        </div>\n    </div>\n\n    <div class=\"quote\" itemscope=\"\" itemtype=\"http://schema.org/CreativeWork\">\n        <span class=\"text\" itemprop=\"text\">“A woman is like a tea bag; you never know how strong it is until it's in hot water.”</span>\n        <span>by <small class=\"author\" itemprop=\"author\">Eleanor Roosevelt</small>\n        <a href=\"/author/Eleanor-Roosevelt\">(about)</a>\n        </span>\n        <div class=\"tags\">\n            Tags:\n            <meta class=\"keywords\" itemprop=\"keywords\" content=\"misattributed-eleanor-roosevelt\"> \n            \n            <a class=\"tag\" href=\"/tag/misattributed-eleanor-roosevelt/page/1/\">misattributed-eleanor-roosevelt</a>\n            \n        </div>\n    </div>\n\n    <div class=\"quote\" itemscope=\"\" itemtype=\"http://schema.org/CreativeWork\">\n        <span class=\"text\" itemprop=\"text\">“A day without sunshine is like, you know, night.”</span>\n        <span>by <small class=\"author\" itemprop=\"author\">Steve Martin</small>\n        <a href=\"/author/Steve-Martin\">(about)</a>\n        </span>\n        <div class=\"tags\">\n            Tags:\n            <meta class=\"keywords\" itemprop=\"keywords\" content=\"humor,obvious,simile\"> \n            \n            <a class=\"tag\" href=\"/tag/humor/page/1/\">humor</a>\n            \n            <a class=\"tag\" href=\"/tag/obvious/page/1/\">obvious</a>\n            \n            <a class=\"tag\" href=\"/tag/simile/page/1/\">simile</a>\n            \n        </div>\n    </div>\n\n    <nav>\n        <ul class=\"pager\">\n            \n            \n            <li class=\"next\">\n                <a href=\"/page/2/\">Next <span aria-hidden=\"true\">→</span></a>\n            </li>\n            \n        </ul>\n    </nav>\n    </div>\n    <div class=\"col-md-4 tags-box\">\n        \n            <h2>Top Ten tags</h2>\n            \n            <span class=\"tag-item\">\n            <a class=\"tag\" style=\"font-size: 28px\" href=\"/tag/love/\">love</a>\n            </span>\n            \n            <span class=\"tag-item\">\n            <a class=\"tag\" style=\"font-size: 26px\" href=\"/tag/inspirational/\">inspirational</a>\n            </span>\n            \n            <span class=\"tag-item\">\n            <a class=\"tag\" style=\"font-size: 26px\" href=\"/tag/life/\">life</a>\n            </span>\n            \n            <span class=\"tag-item\">\n            <a class=\"tag\" style=\"font-size: 24px\" href=\"/tag/humor/\">humor</a>\n            </span>\n            \n            <span class=\"tag-item\">\n            <a class=\"tag\" style=\"font-size: 22px\" href=\"/tag/books/\">books</a>\n            </span>\n            \n            <span class=\"tag-item\">\n            <a class=\"tag\" style=\"font-size: 14px\" href=\"/tag/reading/\">reading</a>\n            </span>\n            \n            <span class=\"tag-item\">\n            <a class=\"tag\" style=\"font-size: 10px\" href=\"/tag/friendship/\">friendship</a>\n            </span>\n            \n            <span class=\"tag-item\">\n            <a class=\"tag\" style=\"font-size: 8px\" href=\"/tag/friends/\">friends</a>\n            </span>\n            \n            <span class=\"tag-item\">\n            <a class=\"tag\" style=\"font-size: 8px\" href=\"/tag/truth/\">truth</a>\n            </span>\n            \n            <span class=\"tag-item\">\n            <a class=\"tag\" style=\"font-size: 6px\" href=\"/tag/simile/\">simile</a>\n            </span>\n            \n        \n    </div>\n</div>\n\n    </div>\n    <footer class=\"footer\">\n        <div class=\"container\">\n            <p class=\"text-muted\">\n                Quotes by: <a href=\"https://www.goodreads.com/quotes\">GoodReads.com</a>\n            </p>\n            <p class=\"copyright\">\n                Made with <span class=\"zyte\">❤</span> by <a class=\"zyte\" href=\"https://www.zyte.com\">Zyte</a>\n            </p>\n        </div>\n    </footer>\n\n</body></html>", "echoData": 3}
{"url": "https://books.toscrape.com/", "statusCode": 200, "browserHtml": "<!DOCTYPE html><!--[if lt IE 7]>      <html lang=\"en-us\" class=\"no-js lt-ie9 lt-ie8 lt-ie7\"> <![endif]--><!--[if IE 7]>         <html lang=\"en-us\" class=\"no-js lt-ie9 lt-ie8\"> <![endif]--><!--[if IE 8]>         <html lang=\"en-us\" class=\"no-js lt-ie9\"> <![endif]--><!--[if gt IE 8]><!--><html lang=\"en-us\" class=\"no-js\"><!--<![endif]--><head>\n        <title>\n    All products | Books to Scrape - Sandbox\n</title>\n\n        <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\">\n        <meta name=\"created\" content=\"24th Jun 2016 09:29\">\n        <meta name=\"description\" content=\"\">\n        <meta name=\"viewport\" content=\"width=device-width\">\n        <meta name=\"robots\" content=\"NOARCHIVE,NOCACHE\">\n\n        <!-- Le HTML5 shim, for IE6-8 support of HTML elements -->\n        <!--[if lt IE 9]>\n        <script src=\"//html5shim.googlecode.com/svn/trunk/html5.js\"></script>\n        <![endif]-->\n\n        \n            <link rel=\"shortcut icon\" href=\"static/oscar/favicon.ico\">\n        \n\n        \n        \n    \n    \n        <link rel=\"stylesheet\" type=\"text/css\" href=\"static/oscar/css/styles.css\">\n    \n    <link rel=\"stylesheet\" href=\"static/oscar/js/bootstrap-datetimepicker/bootstrap-datetimepicker.css\">\n    <link rel=\"stylesheet\" type=\"text/css\" href=\"static/oscar/css/datetimepicker.css\">\n\n\n        \n        \n\n        \n\n        \n            \n            \n\n        \n    </head>\n\n    <body id=\"default\" class=\"default\">\n        \n        \n    \n    \n    <header class=\"header container-fluid\">\n        <div class=\"page_inner\">\n            <div class=\"row\">\n                <div class=\"col-sm-8 h1\"><a href=\"index.html\">Books to Scrape</a><small> We love being scraped!</small>\n</div>\n\n                \n            </div>\n        </div>\n    </header>\n\n    \n    \n<div class=\"container-fluid page\">\n    <div class=\"page_inner\">\n        \n    <ul class=\"breadcrumb\">\n        <li>\n            <a href=\"index.html\">Home</a>\n        </li>\n        <li class=\"active\">All products</li>\n    </ul>\n\n        <div class=\"row\">\n\n            <aside class=\"sidebar col-sm-4 col-md-3\">\n                \n                <div id=\"promotions_left\">\n                    \n                </div>\n                \n    \n    \n        \n        <div class=\"side_categories\">\n            <ul class=\"nav nav-list\">\n                \n                    <li>\n                        <a href=\"catalogue/category/books_1/index.html\">\n                            \n                                Books\n                            \n                        </a>\n\n                        <ul>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/travel_2/index.html\">\n                            \n                                Travel\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/mystery_3/index.html\">\n                            \n                                Mystery\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/historical-fiction_4/index.html\">\n                            \n                                Historical Fiction\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/sequential-art_5/index.html\">\n                            \n                                Sequential Art\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/classics_6/index.html\">\n                            \n                                Classics\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/philosophy_7/index.html\">\n                            \n                                Philosophy\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/romance_8/index.html\">\n                            \n                                Romance\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/womens-fiction_9/index.html\">\n                            \n                                Womens Fiction\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/fiction_10/index.html\">\n                            \n                                Fiction\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/childrens_11/index.html\">\n                            \n                                Childrens\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/religion_12/index.html\">\n                            \n                                Religion\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/nonfiction_13/index.html\">\n                            \n                                Nonfiction\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/music_14/index.html\">\n                            \n                                Music\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/default_15/index.html\">\n                            \n                                Default\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/science-fiction_16/index.html\">\n                            \n                                Science Fiction\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/sports-and-games_17/index.html\">\n                            \n                                Sports and Games\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/add-a-comment_18/index.html\">\n                            \n                                Add a comment\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/fantasy_19/index.html\">\n                            \n                                Fantasy\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/new-adult_20/index.html\">\n                            \n                                New Adult\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/young-adult_21/index.html\">\n                            \n                                Young Adult\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/science_22/index.html\">\n                            \n                                Science\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/poetry_23/index.html\">\n                            \n                                Poetry\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/paranormal_24/index.html\">\n                            \n                                Paranormal\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/art_25/index.html\">\n                            \n                                Art\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/psychology_26/index.html\">\n                            \n                                Psychology\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/autobiography_27/index.html\">\n                            \n                                Autobiography\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/parenting_28/index.html\">\n                            \n                                Parenting\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/adult-fiction_29/index.html\">\n                            \n                                Adult Fiction\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/humor_30/index.html\">\n                            \n                                Humor\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/horror_31/index.html\">\n                            \n                                Horror\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/history_32/index.html\">\n                            \n                                History\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/food-and-drink_33/index.html\">\n                            \n                                Food and Drink\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/christian-fiction_34/index.html\">\n                            \n                                Christian Fiction\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/business_35/index.html\">\n                            \n                                Business\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/biography_36/index.html\">\n                            \n                                Biography\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/thriller_37/index.html\">\n                            \n                                Thriller\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/contemporary_38/index.html\">\n                            \n                                Contemporary\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/spirituality_39/index.html\">\n                            \n                                Spirituality\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/academic_40/index.html\">\n                            \n                                Academic\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/self-help_41/index.html\">\n                            \n                                Self Help\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/historical_42/index.html\">\n                            \n                                Historical\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/christian_43/index.html\">\n                            \n                                Christian\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/suspense_44/index.html\">\n                            \n                                Suspense\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/short-stories_45/index.html\">\n                            \n                                Short Stories\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/novels_46/index.html\">\n                            \n                                Novels\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/health_47/index.html\">\n                            \n                                Health\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/politics_48/index.html\">\n                            \n                                Politics\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/cultural_49/index.html\">\n                            \n                                Cultural\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/erotica_50/index.html\">\n                            \n                                Erotica\n                            \n                        </a>\n\n                        </li>\n                        \n                \n                    <li>\n                        <a href=\"catalogue/category/books/crime_51/index.html\">\n                            \n                                Crime\n                            \n                        </a>\n\n                        </li>\n                        \n                            </ul></li>\n                        \n                \n            </ul>\n        </div>\n    \n    \n\n            </aside>\n\n            <div class=\"col-sm-8 col-md-9\">\n                \n                <div class=\"page-header action\">\n                    <h1>All products</h1>\n                </div>\n                \n\n                \n\n\n\n<div id=\"messages\">\n\n</div>\n\n\n                <div id=\"promotions\">\n                    \n                </div>\n\n                \n    <form method=\"get\" class=\"form-horizontal\">\n        \n        <div style=\"display:none\">\n            \n            \n        </div>\n\n        \n            \n                \n                    <strong>1000</strong> results - showing <strong>1</strong> to <strong>20</strong>.\n                \n            \n            \n        \n    </form>\n    \n        <section>\n            <div class=\"alert alert-warning\" role=\"alert\"><strong>Warning!</strong> This is a demo website for web scraping purposes. Prices and ratings here were randomly assigned and have no real meaning.</div>\n\n            <div>\n                <ol class=\"row\">\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/a-light-in-the-attic_1000/index.html\"><img src=\"media/cache/2c/da/2cdad67c44b002e7ead0cc35693c0e8b.jpg\" alt=\"A Light in the Attic\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Three\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/a-light-in-the-attic_1000/index.html\" title=\"A Light in the Attic\">A Light in the ...</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£51.77</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/tipping-the-velvet_999/index.html\"><img src=\"media/cache/26/0c/260c6ae16bce31c8f8c95daddd9f4a1c.jpg\" alt=\"Tipping the Velvet\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating One\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/tipping-the-velvet_999/index.html\" title=\"Tipping the Velvet\">Tipping the Velvet</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£53.74</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/soumission_998/index.html\"><img src=\"media/cache/3e/ef/3eef99c9d9adef34639f510662022830.jpg\" alt=\"Soumission\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating One\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/soumission_998/index.html\" title=\"Soumission\">Soumission</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£50.10</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/sharp-objects_997/index.html\"><img src=\"media/cache/32/51/3251cf3a3412f53f339e42cac2134093.jpg\" alt=\"Sharp Objects\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Four\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/sharp-objects_997/index.html\" title=\"Sharp Objects\">Sharp Objects</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£47.82</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/sapiens-a-brief-history-of-humankind_996/index.html\"><img src=\"media/cache/be/a5/bea5697f2534a2f86a3ef27b5a8c12a6.jpg\" alt=\"Sapiens: A Brief History of Humankind\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Five\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/sapiens-a-brief-history-of-humankind_996/index.html\" title=\"Sapiens: A Brief History of Humankind\">Sapiens: A Brief History ...</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£54.23</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/the-requiem-red_995/index.html\"><img src=\"media/cache/68/33/68339b4c9bc034267e1da611ab3b34f8.jpg\" alt=\"The Requiem Red\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating One\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/the-requiem-red_995/index.html\" title=\"The Requiem Red\">The Requiem Red</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£22.65</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/the-dirty-little-secrets-of-getting-your-dream-job_994/index.html\"><img src=\"media/cache/92/27/92274a95b7c251fea59a2b8a78275ab4.jpg\" alt=\"The Dirty Little Secrets of Getting Your Dream Job\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Four\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/the-dirty-little-secrets-of-getting-your-dream-job_994/index.html\" title=\"The Dirty Little Secrets of Getting Your Dream Job\">The Dirty Little Secrets ...</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£33.34</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/the-coming-woman-a-novel-based-on-the-life-of-the-infamous-feminist-victoria-woodhull_993/index.html\"><img src=\"media/cache/3d/54/3d54940e57e662c4dd1f3ff00c78cc64.jpg\" alt=\"The Coming Woman: A Novel Based on the Life of the Infamous Feminist, Victoria Woodhull\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Three\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/the-coming-woman-a-novel-based-on-the-life-of-the-infamous-feminist-victoria-woodhull_993/index.html\" title=\"The Coming Woman: A Novel Based on the Life of the Infamous Feminist, Victoria Woodhull\">The Coming Woman: A ...</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£17.93</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/the-boys-in-the-boat-nine-americans-and-their-epic-quest-for-gold-at-the-1936-berlin-olympics_992/index.html\"><img src=\"media/cache/66/88/66883b91f6804b2323c8369331cb7dd1.jpg\" alt=\"The Boys in the Boat: Nine Americans and Their Epic Quest for Gold at the 1936 Berlin Olympics\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Four\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/the-boys-in-the-boat-nine-americans-and-their-epic-quest-for-gold-at-the-1936-berlin-olympics_992/index.html\" title=\"The Boys in the Boat: Nine Americans and Their Epic Quest for Gold at the 1936 Berlin Olympics\">The Boys in the ...</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£22.60</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/the-black-maria_991/index.html\"><img src=\"media/cache/58/46/5846057e28022268153beff6d352b06c.jpg\" alt=\"The Black Maria\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating One\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/the-black-maria_991/index.html\" title=\"The Black Maria\">The Black Maria</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£52.15</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/starving-hearts-triangular-trade-trilogy-1_990/index.html\"><img src=\"media/cache/be/f4/bef44da28c98f905a3ebec0b87be8530.jpg\" alt=\"Starving Hearts (Triangular Trade Trilogy, #1)\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Two\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/starving-hearts-triangular-trade-trilogy-1_990/index.html\" title=\"Starving Hearts (Triangular Trade Trilogy, #1)\">Starving Hearts (Triangular Trade ...</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£13.99</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/shakespeares-sonnets_989/index.html\"><img src=\"media/cache/10/48/1048f63d3b5061cd2f424d20b3f9b666.jpg\" alt=\"Shakespeare's Sonnets\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Four\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/shakespeares-sonnets_989/index.html\" title=\"Shakespeare's Sonnets\">Shakespeare's Sonnets</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£20.66</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/set-me-free_988/index.html\"><img src=\"media/cache/5b/88/5b88c52633f53cacf162c15f4f823153.jpg\" alt=\"Set Me Free\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Five\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/set-me-free_988/index.html\" title=\"Set Me Free\">Set Me Free</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£17.46</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/scott-pilgrims-precious-little-life-scott-pilgrim-1_987/index.html\"><img src=\"media/cache/94/b1/94b1b8b244bce9677c2f29ccc890d4d2.jpg\" alt=\"Scott Pilgrim's Precious Little Life (Scott Pilgrim #1)\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Five\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/scott-pilgrims-precious-little-life-scott-pilgrim-1_987/index.html\" title=\"Scott Pilgrim's Precious Little Life (Scott Pilgrim #1)\">Scott Pilgrim's Precious Little ...</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£52.29</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/rip-it-up-and-start-again_986/index.html\"><img src=\"media/cache/81/c4/81c4a973364e17d01f217e1188253d5e.jpg\" alt=\"Rip it Up and Start Again\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Five\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/rip-it-up-and-start-again_986/index.html\" title=\"Rip it Up and Start Again\">Rip it Up and ...</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£35.02</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/our-band-could-be-your-life-scenes-from-the-american-indie-underground-1981-1991_985/index.html\"><img src=\"media/cache/54/60/54607fe8945897cdcced0044103b10b6.jpg\" alt=\"Our Band Could Be Your Life: Scenes from the American Indie Underground, 1981-1991\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Three\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/our-band-could-be-your-life-scenes-from-the-american-indie-underground-1981-1991_985/index.html\" title=\"Our Band Could Be Your Life: Scenes from the American Indie Underground, 1981-1991\">Our Band Could Be ...</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£57.25</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/olio_984/index.html\"><img src=\"media/cache/55/33/553310a7162dfbc2c6d19a84da0df9e1.jpg\" alt=\"Olio\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating One\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/olio_984/index.html\" title=\"Olio\">Olio</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£23.88</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/mesaerion-the-best-science-fiction-stories-1800-1849_983/index.html\"><img src=\"media/cache/09/a3/09a3aef48557576e1a85ba7efea8ecb7.jpg\" alt=\"Mesaerion: The Best Science Fiction Stories 1800-1849\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating One\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/mesaerion-the-best-science-fiction-stories-1800-1849_983/index.html\" title=\"Mesaerion: The Best Science Fiction Stories 1800-1849\">Mesaerion: The Best Science ...</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£37.59</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/libertarianism-for-beginners_982/index.html\"><img src=\"media/cache/0b/bc/0bbcd0a6f4bcd81ccb1049a52736406e.jpg\" alt=\"Libertarianism for Beginners\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Two\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/libertarianism-for-beginners_982/index.html\" title=\"Libertarianism for Beginners\">Libertarianism for Beginners</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£51.33</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                        <li class=\"col-xs-6 col-sm-4 col-md-3 col-lg-3\">\n\n\n\n\n\n\n    <article class=\"product_pod\">\n        \n            <div class=\"image_container\">\n                \n                    \n                    <a href=\"catalogue/its-only-the-himalayas_981/index.html\"><img src=\"media/cache/27/a5/27a53d0bb95bdd88288eaf66c9230d7e.jpg\" alt=\"It's Only the Himalayas\" class=\"thumbnail\"></a>\n                    \n                \n            </div>\n        \n\n        \n            \n                <p class=\"star-rating Two\">\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                    <i class=\"icon-star\"></i>\n                </p>\n            \n        \n\n        \n            <h3><a href=\"catalogue/its-only-the-himalayas_981/index.html\" title=\"It's Only the Himalayas\">It's Only the Himalayas</a></h3>\n        \n\n        \n            <div class=\"product_price\">\n                \n\n\n\n\n\n\n    \n        <p class=\"price_color\">£45.17</p>\n    \n\n<p class=\"instock availability\">\n    <i class=\"icon-ok\"></i>\n    \n        In stock\n    \n</p>\n\n                \n                    \n\n\n\n\n\n\n    \n    <form>\n        <button type=\"submit\" class=\"btn btn-primary btn-block\" data-loading-text=\"Adding...\">Add to basket</button>\n    </form>\n\n\n                \n            </div>\n        \n    </article>\n\n</li>\n                    \n                </ol>\n                \n\n\n\n    <div>\n        <ul class=\"pager\">\n            \n            <li class=\"current\">\n            \n                Page 1 of 50\n            \n            </li>\n            \n                <li class=\"next\"><a href=\"catalogue/page-2.html\">next</a></li>\n            \n        </ul>\n    </div>\n\n\n            </div>\n        </section>\n    \n\n\n            </div>\n\n        </div><!-- /row -->\n    </div><!-- /page_inner -->\n</div><!-- /container-fluid -->\n\n\n    \n<footer class=\"footer container-fluid\">\n    \n        \n    \n</footer>\n\n\n        \n        \n  \n            <!-- jQuery -->\n            <script src=\"http://ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js\"></script>\n            <script>window.jQuery || document.write('<script src=\"static/oscar/js/jquery/jquery-1.9.1.min.js\"><\\/script>')</script><script src=\"static/oscar/js/jquery/jquery-1.9.1.min.js\"></script>\n        \n  \n\n\n        \n        \n    \n        \n    <!-- Twitter Bootstrap -->\n    <script type=\"text/javascript\" src=\"static/oscar/js/bootstrap3/bootstrap.min.js\"></script>\n    <!-- Oscar -->\n    <script src=\"static/oscar/js/oscar/ui.js\" type=\"text/javascript\" charset=\"utf-8\"></script>\n\n    <script src=\"static/oscar/js/bootstrap-datetimepicker/bootstrap-datetimepicker.js\" type=\"text/javascript\" charset=\"utf-8\"></script>\n    <script src=\"static/oscar/js/bootstrap-datetimepicker/locales/bootstrap-datetimepicker.all.js\" type=\"text/javascript\" charset=\"utf-8\"></script>\n\n\n        \n        \n    \n\n    \n\n\n        \n        <script type=\"text/javascript\">\n            $(function() {\n                \n    \n    \n    oscar.init();\n\n    oscar.search.init();\n\n            });\n        </script>\n\n        \n        <!-- Version: N/A -->\n        \n    \n\n</body></html>", "echoData": 2}
{"url": "https://toscrape.com/", "statusCode": 200, "browserHtml": "<!DOCTYPE html><html lang=\"en\"><head>\n        <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">\n        <title>Scraping Sandbox</title>\n        <link href=\"./css/bootstrap.min.css\" rel=\"stylesheet\">\n        <link href=\"./css/main.css\" rel=\"stylesheet\">\n    </head>\n    <body>\n        <div class=\"container\">\n            <div class=\"row\">\n                <div class=\"col-md-1\"></div>\n                <div class=\"col-md-10 well\">\n                    <img class=\"logo\" src=\"img/zyte.png\" width=\"200px\">\n                    <h1 class=\"text-right\">Web Scraping Sandbox</h1>\n                </div>\n            </div>\n\n            <div class=\"row\">\n                <div class=\"col-md-1\"></div>\n                <div class=\"col-md-10\">\n                    <h2>Books</h2>\n                    <p>A <a href=\"http://books.toscrape.com\">fictional bookstore</a> that desperately wants to be scraped. It's a safe place for beginners learning web scraping and for developers validating their scraping technologies as well. Available at: <a href=\"http://books.toscrape.com\">books.toscrape.com</a></p>\n                    <div class=\"col-md-6\">\n                        <a href=\"http://books.toscrape.com\"><img src=\"./img/books.png\" class=\"img-thumbnail\"></a>\n                    </div>\n                    <div class=\"col-md-6\">\n                        <table class=\"table table-hover\">\n                            <tbody><tr><th colspan=\"2\">Details</th></tr>\n                            <tr><td>Amount of items </td><td>1000</td></tr>\n                            <tr><td>Pagination </td><td>✔</td></tr>\n                            <tr><td>Items per page </td><td>max 20</td></tr>\n                            <tr><td>Requires JavaScript </td><td>✘</td></tr>\n                        </tbody></table>\n                    </div>\n                </div>\n            </div>\n\n            <div class=\"row\">\n                <div class=\"col-md-1\"></div>\n                <div class=\"col-md-10\">\n                    <h2>Quotes</h2>\n                    <p><a href=\"http://quotes.toscrape.com/\">A website</a> that lists quotes from famous people. It has many endpoints showing the quotes in many different ways, each of them including new scraping challenges for you, as described below.</p>\n                    <div class=\"col-md-6\">\n                        <a href=\"http://quotes.toscrape.com\"><img src=\"./img/quotes.png\" class=\"img-thumbnail\"></a>\n                    </div>\n                    <div class=\"col-md-6\">\n                        <table class=\"table table-hover\">\n                            <tbody><tr><th colspan=\"2\">Endpoints</th></tr>\n                            <tr><td><a href=\"http://quotes.toscrape.com/\">Default</a></td><td>Microdata and pagination</td></tr>\n                            <tr><td><a href=\"http://quotes.toscrape.com/scroll\">Scroll</a> </td><td>infinite scrolling pagination</td></tr>\n                            <tr><td><a href=\"http://quotes.toscrape.com/js\">JavaScript</a> </td><td>JavaScript generated content</td></tr>\n                            <tr><td><a href=\"http://quotes.toscrape.com/js-delayed\">Delayed</a> </td><td>Same as JavaScript but with a delay (?delay=10000)</td></tr>\n                            <tr><td><a href=\"http://quotes.toscrape.com/tableful\">Tableful</a> </td><td>a table based messed-up layout</td></tr>\n                            <tr><td><a href=\"http://quotes.toscrape.com/login\">Login</a> </td><td>login with CSRF token (any user/passwd works)</td></tr>\n                            <tr><td><a href=\"http://quotes.toscrape.com/search.aspx\">ViewState</a> </td><td>an AJAX based filter form with ViewStates</td></tr>\n                            <tr><td><a href=\"http://quotes.toscrape.com/random\">Random</a> </td><td>a single random quote</td></tr>\n                        </tbody></table>\n                    </div>\n                </div>\n            </div>\n        </div>\n    \n\n</body></html>", "echoData": 1}