Skip to content

Commit

Permalink
Rewrite
Browse files Browse the repository at this point in the history
  • Loading branch information
Luen committed Aug 17, 2022
1 parent 9e3527e commit 52e016c
Show file tree
Hide file tree
Showing 4 changed files with 3,695 additions and 1,337 deletions.
36 changes: 36 additions & 0 deletions .eslintrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"env": {
"node": true
},
"extends": "airbnb/base",
"parserOptions": {
"sourceType": "script",
"ecmaVersion": 2022
},
"globals": {
"window": true,
"document": true
},
"rules": {
"max-len": 0,
"arrow-parens": 0,
"no-console": 0,
"no-await-in-loop": 0,
"object-curly-newline": 0,
'no-restricted-syntax': [
'error',
{
selector: 'ForInStatement',
message: 'for..in loops iterate over the entire prototype chain, which is virtually never what you want. Use Object.{keys,values,entries}, and iterate over the resulting array.',
},
{
selector: 'LabeledStatement',
message: 'Labels are a form of GOTO; using them makes code confusing and hard to maintain and understand.',
},
{
selector: 'WithStatement',
message: '`with` is disallowed in strict mode because it makes code impossible to predict and optimize.',
},
],
}
}
113 changes: 53 additions & 60 deletions index.js
Original file line number Diff line number Diff line change
@@ -1,79 +1,72 @@
const puppeteer = require('puppeteer');
const cheerio = require('cheerio');
const url = 'https://sherrilltree.com/reecoil-full-reach-chainsaw-lanyard/';
//https://www.treestuff.com/reecoil-big-boss-lanyard/
//https://www.treestuff.com/reecoil-full-reach-chainsaw-lanyard/
//https://www.treestuff.com/reecoil-se-full-reach-chainsaw-lanyard-pink/
//https://sherrilltree.com/reecoil-full-reach-chainsaw-lanyard/
const selectors = {
reviews:"div.yotpo-reviews",
name:"span.yotpo-user-name",
rating:"div.yotpo-review-stars span.sr-only",
title:"div.yotpo-main div.content-title",
desc:"div.content-review",
date:"span.yotpo-review-date"
}

(async () => {
const browser = await puppeteer.launch({headless:true,args: ["--no-sandbox"]});
const url = 'https://sherrilltree.com/samson-stable-braid-rigging-rope-1-2in/';

const selectors = {
yotpo: 'div.yotpo.yotpo-main-widget',
reviews: 'div.yotpo-reviews',
review: 'div.yotpo-review',
name: 'span.yotpo-user-name',
rating: 'div.yotpo-review-stars span.sr-only',
title: 'div.yotpo-main div.content-title',
desc: 'div.content-review',
date: 'span.yotpo-review-date',
pager: 'div.yotpo-pager[data-total]',
next: 'div.yotpo-pager a[rel^=next]',
};

const browser = await puppeteer.launch({ headless: false, args: ['--no-sandbox'] });
const page = await browser.newPage();
await page.setViewport({
width: 1280,
height: 1024
})
await page.goto(url, {
waitUntil: 'networkidle2'
height: 1024,
});
await page.goto(url, { waitUntil: 'networkidle0' });

await page.waitForSelector(selectors.reviews)

//await page.waitFor(10000)

const html = await page.evaluate(() => {
return document.querySelector(selectors.reviews).innerHTML
});
await page.waitForSelector(selectors.reviews);

var $ = cheerio.load(html);
// const html = await page.evaluate(selector => document.querySelector(selector.reviews).innerHTML, selectors);

let reviewsObj = {}
const reviewsTotal = await page.evaluate(selector => document.querySelector(selector.pager).getAttribute('data-total'), selectors);
const reviewsPerPage = await page.evaluate(selector => document.querySelector(selector.pager).getAttribute('data-per-page'), selectors);
const reviewsPages = Math.ceil(reviewsTotal / reviewsPerPage);
console.log('Total reviews:', reviewsTotal);
console.log('Reviews per page:', reviewsPerPage);
console.log('Pages:', reviewsPages);

//console.log(reviews)
$("div.yotpo-pager a.goTo").each(async (index, elem) => {
console.log($(this).text().trim());
var $ = cheerio.load(html);
$('div.yotpo-review').each(function(i, el) {
//span.yotpo-user-name
reviewNumber = i*index;
reviewsObj[reviewNumber] = {
name : $(this).find(selectors.name).text().trim(),
rating : $(this).find(selectors.rating).text().trim(),
title : $(this).find(selectors.title).text().trim(),
desc : $(this).find(selectors.desc).text().trim(),
date : $(this).find(selectors.date).text().trim()
var reviewsArr = [];
// while() { 'div.yotpo-pager a:not[.yotpo-disabled]' }
for (let p = 1; p < reviewsPages + 1; p += 1) {
console.log('Getting page:', p);
const d = await page.evaluate((selector, p) => {
const reviews = document.querySelectorAll(selector.review);
let data = [];
for (let r = 0; r < reviews.length; r += 1) {
// const reviewNumber = data.length + 1;
data.push({
name: document.querySelectorAll(selector.name)[r].textContent.trim(),
rating: document.querySelectorAll(selector.rating)[r].textContent.trim(),
title: document.querySelectorAll(selector.title)[r].textContent.trim(),
desc: document.querySelectorAll(selector.desc)[r].textContent.trim(),
date: document.querySelectorAll(selector.date)[r].textContent.trim(),
});
}
});
console.log(reviewsObj);
//$("div.yotpo-pager").find("a.yotpo-active").attr("data-page")
//$("div.yotpo-pager").find("a.yotpo-active").href()
$("div.yotpo-pager a[rel='next']").click();
await page.waitForSelector(selectors.name);
});

return data;
}, selectors, p);

/*const reviews = await page.evaluate(() => {
return document.querySelector('div.yotpo-review').innerHTML;
});*/
reviewsArr = [...reviewsArr, ...d];

//for each yotpo-review
//for (let i = 0; i < reviews.length; i++) {
// const review = await (await reviews[i]);
// console.log(review);
//}
// if not last page in pagination
if (!reviewsPages.length + 1 === p) {
// await page.waitFor(2000);

await page.click(selectors.next);
await page.waitForSelector(selectors.name);
}
}

//console.log(textContent);

console.log('Got', reviewsArr.length, 'reviews');

await browser.close();

})();
Loading

0 comments on commit 52e016c

Please sign in to comment.