puppeteer scripts
This commit is contained in:
73
Workflow/MyDealz Kommentare/puppeteer_extract_comments.js
Normal file
73
Workflow/MyDealz Kommentare/puppeteer_extract_comments.js
Normal file
@@ -0,0 +1,73 @@
|
||||
async function click_replies(page, selector_comment_body, selector_replies) {
|
||||
for (i = 0; i < 1; i++) {
|
||||
try {
|
||||
const comment_cnt = await page.evaluate((selector_comment_body) => {
|
||||
return document.querySelectorAll(selector_comment_body).length;
|
||||
}, selector_comment_body);
|
||||
|
||||
await page.waitForSelector(selector_replies, { visible: true, timeout: 2000 });
|
||||
await page.evaluate((selector) =>
|
||||
document.querySelector(selector).click(), selector_replies);
|
||||
|
||||
await page.waitForFunction(
|
||||
(selector, originalCount) => {
|
||||
return document.querySelectorAll(selector).length > originalCount;
|
||||
},
|
||||
{ timeout: 10000 }, // Optional: Timeout anpassen
|
||||
selector_comment_body, // Argument 1 für die Funktion im Browser
|
||||
comment_cnt // Argument 2 für die Funktion im Browser
|
||||
);
|
||||
// Optional: Kurze menschliche Pause (0.5 bis 1 Sekunde), um Bot-Erkennung zu vermeiden
|
||||
await new Promise(r => setTimeout(r, 500 + Math.random() * 500));
|
||||
} catch (error) {
|
||||
console.log("Keine Antworten vorhanden");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function extract_comments(page, data, selector_comment_body) {
|
||||
// Extract Comments
|
||||
data = data.concat(await page.evaluate((selector_comment_body) => {
|
||||
const commentNode = document.querySelectorAll(selector_comment_body);
|
||||
const commentsText = Array.from(commentNode).map(node => node.textContent.trim())
|
||||
return commentsText
|
||||
}, selector_comment_body));
|
||||
// Optional: Kurze menschliche Pause (0.5 bis 1 Sekunde), um Bot-Erkennung zu vermeiden
|
||||
await new Promise(r => setTimeout(r, 500 + Math.random() * 500));
|
||||
|
||||
return data
|
||||
}
|
||||
|
||||
const { cookies } = $input.first().json;
|
||||
deal_url = $input.last().json['url'];
|
||||
|
||||
const selector_replies = "button[data-t='moreReplies']";
|
||||
const selector_comment_body = ".comment-body";
|
||||
const sel_next_page = "::-p-aria(Nächste Seite)";
|
||||
var data = [];
|
||||
|
||||
// Restore cookies
|
||||
await $page.setCookie(...cookies);
|
||||
|
||||
// Navigate to authenticated page
|
||||
await $page.goto(deal_url);
|
||||
|
||||
try {
|
||||
|
||||
for (i = 0; i < 10; i++) {
|
||||
//await click_replies($page, selector_comment_body, selector_replies);
|
||||
data = await extract_comments($page, data, selector_comment_body);
|
||||
|
||||
await Promise.all([
|
||||
await $page.waitForSelector(sel_next_page),
|
||||
await $page.click(sel_next_page),
|
||||
]);
|
||||
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
return [{ data }];
|
||||
}
|
||||
|
||||
return [{ data }];
|
||||
Reference in New Issue
Block a user