-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
3,695 additions
and
1,337 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
{ | ||
"env": { | ||
"node": true | ||
}, | ||
"extends": "airbnb/base", | ||
"parserOptions": { | ||
"sourceType": "script", | ||
"ecmaVersion": 2022 | ||
}, | ||
"globals": { | ||
"window": true, | ||
"document": true | ||
}, | ||
"rules": { | ||
"max-len": 0, | ||
"arrow-parens": 0, | ||
"no-console": 0, | ||
"no-await-in-loop": 0, | ||
"object-curly-newline": 0, | ||
'no-restricted-syntax': [ | ||
'error', | ||
{ | ||
selector: 'ForInStatement', | ||
message: 'for..in loops iterate over the entire prototype chain, which is virtually never what you want. Use Object.{keys,values,entries}, and iterate over the resulting array.', | ||
}, | ||
{ | ||
selector: 'LabeledStatement', | ||
message: 'Labels are a form of GOTO; using them makes code confusing and hard to maintain and understand.', | ||
}, | ||
{ | ||
selector: 'WithStatement', | ||
message: '`with` is disallowed in strict mode because it makes code impossible to predict and optimize.', | ||
}, | ||
], | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,79 +1,72 @@ | ||
const puppeteer = require('puppeteer'); | ||
const cheerio = require('cheerio'); | ||
const url = 'https://sherrilltree.com/reecoil-full-reach-chainsaw-lanyard/'; | ||
//https://www.treestuff.com/reecoil-big-boss-lanyard/ | ||
//https://www.treestuff.com/reecoil-full-reach-chainsaw-lanyard/ | ||
//https://www.treestuff.com/reecoil-se-full-reach-chainsaw-lanyard-pink/ | ||
//https://sherrilltree.com/reecoil-full-reach-chainsaw-lanyard/ | ||
const selectors = { | ||
reviews:"div.yotpo-reviews", | ||
name:"span.yotpo-user-name", | ||
rating:"div.yotpo-review-stars span.sr-only", | ||
title:"div.yotpo-main div.content-title", | ||
desc:"div.content-review", | ||
date:"span.yotpo-review-date" | ||
} | ||
|
||
(async () => { | ||
const browser = await puppeteer.launch({headless:true,args: ["--no-sandbox"]}); | ||
const url = 'https://sherrilltree.com/samson-stable-braid-rigging-rope-1-2in/'; | ||
|
||
const selectors = { | ||
yotpo: 'div.yotpo.yotpo-main-widget', | ||
reviews: 'div.yotpo-reviews', | ||
review: 'div.yotpo-review', | ||
name: 'span.yotpo-user-name', | ||
rating: 'div.yotpo-review-stars span.sr-only', | ||
title: 'div.yotpo-main div.content-title', | ||
desc: 'div.content-review', | ||
date: 'span.yotpo-review-date', | ||
pager: 'div.yotpo-pager[data-total]', | ||
next: 'div.yotpo-pager a[rel^=next]', | ||
}; | ||
|
||
const browser = await puppeteer.launch({ headless: false, args: ['--no-sandbox'] }); | ||
const page = await browser.newPage(); | ||
await page.setViewport({ | ||
width: 1280, | ||
height: 1024 | ||
}) | ||
await page.goto(url, { | ||
waitUntil: 'networkidle2' | ||
height: 1024, | ||
}); | ||
await page.goto(url, { waitUntil: 'networkidle0' }); | ||
|
||
await page.waitForSelector(selectors.reviews) | ||
|
||
//await page.waitFor(10000) | ||
|
||
const html = await page.evaluate(() => { | ||
return document.querySelector(selectors.reviews).innerHTML | ||
}); | ||
await page.waitForSelector(selectors.reviews); | ||
|
||
var $ = cheerio.load(html); | ||
// const html = await page.evaluate(selector => document.querySelector(selector.reviews).innerHTML, selectors); | ||
|
||
let reviewsObj = {} | ||
const reviewsTotal = await page.evaluate(selector => document.querySelector(selector.pager).getAttribute('data-total'), selectors); | ||
const reviewsPerPage = await page.evaluate(selector => document.querySelector(selector.pager).getAttribute('data-per-page'), selectors); | ||
const reviewsPages = Math.ceil(reviewsTotal / reviewsPerPage); | ||
console.log('Total reviews:', reviewsTotal); | ||
console.log('Reviews per page:', reviewsPerPage); | ||
console.log('Pages:', reviewsPages); | ||
|
||
//console.log(reviews) | ||
$("div.yotpo-pager a.goTo").each(async (index, elem) => { | ||
console.log($(this).text().trim()); | ||
var $ = cheerio.load(html); | ||
$('div.yotpo-review').each(function(i, el) { | ||
//span.yotpo-user-name | ||
reviewNumber = i*index; | ||
reviewsObj[reviewNumber] = { | ||
name : $(this).find(selectors.name).text().trim(), | ||
rating : $(this).find(selectors.rating).text().trim(), | ||
title : $(this).find(selectors.title).text().trim(), | ||
desc : $(this).find(selectors.desc).text().trim(), | ||
date : $(this).find(selectors.date).text().trim() | ||
var reviewsArr = []; | ||
// while() { 'div.yotpo-pager a:not[.yotpo-disabled]' } | ||
for (let p = 1; p < reviewsPages + 1; p += 1) { | ||
console.log('Getting page:', p); | ||
const d = await page.evaluate((selector, p) => { | ||
const reviews = document.querySelectorAll(selector.review); | ||
let data = []; | ||
for (let r = 0; r < reviews.length; r += 1) { | ||
// const reviewNumber = data.length + 1; | ||
data.push({ | ||
name: document.querySelectorAll(selector.name)[r].textContent.trim(), | ||
rating: document.querySelectorAll(selector.rating)[r].textContent.trim(), | ||
title: document.querySelectorAll(selector.title)[r].textContent.trim(), | ||
desc: document.querySelectorAll(selector.desc)[r].textContent.trim(), | ||
date: document.querySelectorAll(selector.date)[r].textContent.trim(), | ||
}); | ||
} | ||
}); | ||
console.log(reviewsObj); | ||
//$("div.yotpo-pager").find("a.yotpo-active").attr("data-page") | ||
//$("div.yotpo-pager").find("a.yotpo-active").href() | ||
$("div.yotpo-pager a[rel='next']").click(); | ||
await page.waitForSelector(selectors.name); | ||
}); | ||
|
||
return data; | ||
}, selectors, p); | ||
|
||
/*const reviews = await page.evaluate(() => { | ||
return document.querySelector('div.yotpo-review').innerHTML; | ||
});*/ | ||
reviewsArr = [...reviewsArr, ...d]; | ||
|
||
//for each yotpo-review | ||
//for (let i = 0; i < reviews.length; i++) { | ||
// const review = await (await reviews[i]); | ||
// console.log(review); | ||
//} | ||
// if not last page in pagination | ||
if (!reviewsPages.length + 1 === p) { | ||
// await page.waitFor(2000); | ||
|
||
await page.click(selectors.next); | ||
await page.waitForSelector(selectors.name); | ||
} | ||
} | ||
|
||
//console.log(textContent); | ||
|
||
console.log('Got', reviewsArr.length, 'reviews'); | ||
|
||
await browser.close(); | ||
|
||
})(); |
Oops, something went wrong.