Rationale
The URL explorer returns information about pages matching your criteria, and for each page returns the selected fields.
Making Requests
URL explorer with a GET
request at /api/crawls/{crawl_id}/url-explorer
.
An URL explorer query is formed by:
-
One or more filters. Each filter selects a set of page and restrict the scope of pages to return. If more than one filter is given, then the final scope of pages is the conjunction of all filters. A list of available filters can be obtained at
/api/projects/{project_id}/quickfilters
-
A comma separated list of fields to return for each page. The list of available fields depends on each crawl and can be obtained at
/api/crawls/{crawl_id}/querybuilder/fields
-
Pagination controls: number of items to return, offset in the list, sort order.
Examples
List of all crawl pages, with a default set of fields
import json, requests
token = 'TOKEN'
crawl_id = 'CRAWL-ID'
url = 'https://app.oncrawl.com/api/crawls/{}/url-explorer'.format(crawl_id)
resp = requests.get(url, headers={'x-oncrawl-token': token},
params=dict(base_filters='crawled_pages'))
print json.dumps(resp.json(), indent=2, sort_keys=True)
Below the request JSON response:
{
"meta": {
"columns": [
"url",
"inrank",
"status_code",
"meta_robots"
],
"in_cr": false,
"total_hits": 307,
"total_pages": 31
},
"oql": {
"field": [
"fetched",
"equals",
true
]
},
"urls": [
{
"inrank": 10,
"meta_robots": "noodp",
"status_code": 200,
"url": "http://www.oncrawl.com/"
},
{
"inrank": 9,
"meta_robots": "noodp",
"status_code": 200,
"url": "http://www.oncrawl.com/features/"
},
{
"inrank": 8,
"meta_robots": "noodp",
"status_code": 200,
"url": "http://www.oncrawl.com/2016-link-building-state-of-play/"
},
{
"inrank": 8,
"meta_robots": "noodp",
"status_code": 200,
"url": "http://www.oncrawl.com/3-seo-tools-try-2016/"
},
{
"inrank": 8,
"meta_robots": "noodp",
"status_code": 200,
"url": "http://www.oncrawl.com/pagination-and-seo/"
},
{
"inrank": 8,
"meta_robots": "noodp",
"status_code": 200,
"url": "http://www.oncrawl.com/seo-elements-to-check-when-launching-your-new-website/"
},
{
"inrank": 8,
"meta_robots": "noodp",
"status_code": 200,
"url": "http://www.oncrawl.com/blog/"
},
{
"inrank": 8,
"meta_robots": "noodp",
"status_code": 200,
"url": "http://www.oncrawl.com/chrome-extension/"
},
{
"inrank": 8,
"meta_robots": "noodp",
"status_code": 200,
"url": "http://www.oncrawl.com/contact/"
},
{
"inrank": 8,
"meta_robots": null,
"status_code": 200,
"url": "http://www.oncrawl.com/about/"
}
]
}
List of pages having near duplicate content issues, order by group
import json, requests
token = 'TOKEN'
crawl_id = 'CRAWL-ID'
url = 'https://app.oncrawl.com/api/crawls/{}/url-explorer'.format(crawl_id)
params = dict(
limit=5,
base_filters='nearduplicate_content:true',
fields='url,inrank,status_code,meta_robots,clusters',
sort='clusters:desc',
)
resp = requests.get(url, headers={'x-oncrawl-token': token}, params=params)
print json.dumps(resp.json(), indent=2, sort_keys=True)
Below the request JSON response:
{
"meta": {
"columns": [
"url",
"inrank",
"status_code",
"meta_robots",
"clusters"
],
"in_cr": false,
"total_hits": 2,
"total_pages": 1
},
"oql": {
"field": [
"nearduplicate_content",
"equals",
"true"
]
},
"urls": [
{
"clusters": "th0_ef1be8d0be2723d5d2b95eaef011283fc219f3f0967f37352d39615addbbbcbe",
"inrank": 4,
"meta_robots": "noodp",
"status_code": 200,
"url": "http://www.oncrawl.com/blog/page/9/"
},
{
"clusters": "th0_ef1be8d0be2723d5d2b95eaef011283fc219f3f0967f37352d39615addbbbcbe",
"inrank": 3,
"meta_robots": "noodp",
"status_code": 200,
"url": "http://www.oncrawl.com/blog/page/7/"
}
]
}