feat: Instagram scraper with GraphQL API integration - Automated followings list extraction via API interception - Profile scraping using GraphQL endpoint interception - DOM fallback for edge cases - Performance timing for all operations - Anti-bot measures and human-like behavior simulation
This commit is contained in:
146
utils.js
Normal file
146
utils.js
Normal file
@@ -0,0 +1,146 @@
|
||||
function randomSleep(minMs = 2000, maxMs = 5000) {
|
||||
const delay = Math.floor(Math.random() * (maxMs - minMs + 1)) + minMs;
|
||||
return new Promise((res) => setTimeout(res, delay));
|
||||
}
|
||||
|
||||
async function humanLikeMouseMovement(page, steps = 10) {
|
||||
// Simulate human-like mouse movements across the page
|
||||
const viewport = await page.viewport();
|
||||
const width = viewport.width;
|
||||
const height = viewport.height;
|
||||
|
||||
for (let i = 0; i < steps; i++) {
|
||||
const x = Math.floor(Math.random() * width);
|
||||
const y = Math.floor(Math.random() * height);
|
||||
|
||||
await page.mouse.move(x, y, { steps: Math.floor(Math.random() * 10) + 5 });
|
||||
await randomSleep(100, 500);
|
||||
}
|
||||
}
|
||||
|
||||
async function randomScroll(page, scrollCount = 3) {
|
||||
// Perform random scrolling to simulate human behavior
|
||||
for (let i = 0; i < scrollCount; i++) {
|
||||
const scrollAmount = Math.floor(Math.random() * 300) + 100;
|
||||
await page.evaluate((amount) => {
|
||||
window.scrollBy(0, amount);
|
||||
}, scrollAmount);
|
||||
await randomSleep(800, 1500);
|
||||
}
|
||||
}
|
||||
|
||||
async function simulateHumanBehavior(page, options = {}) {
|
||||
// Combined function to simulate various human-like behaviors
|
||||
const { mouseMovements = 5, scrolls = 2, randomClicks = false } = options;
|
||||
|
||||
// Random mouse movements
|
||||
if (mouseMovements > 0) {
|
||||
await humanLikeMouseMovement(page, mouseMovements);
|
||||
}
|
||||
|
||||
// Random scrolling
|
||||
if (scrolls > 0) {
|
||||
await randomScroll(page, scrolls);
|
||||
}
|
||||
|
||||
// Optional: Random clicks on non-interactive elements
|
||||
if (randomClicks) {
|
||||
try {
|
||||
await page.evaluate(() => {
|
||||
const elements = document.querySelectorAll("div, span, p");
|
||||
if (elements.length > 0) {
|
||||
const randomElement =
|
||||
elements[Math.floor(Math.random() * elements.length)];
|
||||
const rect = randomElement.getBoundingClientRect();
|
||||
// Just move to it, don't actually click to avoid triggering actions
|
||||
}
|
||||
});
|
||||
} catch (err) {
|
||||
// Ignore errors from random element selection
|
||||
}
|
||||
}
|
||||
|
||||
await randomSleep(500, 1000);
|
||||
}
|
||||
|
||||
async function withRetry(fn, options = {}) {
|
||||
const {
|
||||
maxRetries = 3,
|
||||
initialDelay = 2000,
|
||||
maxDelay = 30000,
|
||||
shouldRetry = (error) => true,
|
||||
} = options;
|
||||
|
||||
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
||||
try {
|
||||
return await fn();
|
||||
} catch (error) {
|
||||
const isLastAttempt = attempt === maxRetries - 1;
|
||||
|
||||
// Check if we should retry this error
|
||||
if (!shouldRetry(error) || isLastAttempt) {
|
||||
throw error;
|
||||
}
|
||||
|
||||
// Calculate exponential backoff delay: 2s, 4s, 8s, 16s, 30s (capped)
|
||||
const exponentialDelay = Math.min(
|
||||
initialDelay * Math.pow(2, attempt),
|
||||
maxDelay
|
||||
);
|
||||
|
||||
// Add jitter (randomize ±20%) to avoid thundering herd
|
||||
const jitter = exponentialDelay * (0.8 + Math.random() * 0.4);
|
||||
const delay = Math.floor(jitter);
|
||||
|
||||
console.log(
|
||||
`Retry attempt ${attempt + 1}/${maxRetries} after ${delay}ms delay...`
|
||||
);
|
||||
console.log(`Error: ${error.message || error}`);
|
||||
|
||||
await randomSleep(delay, delay);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function handleRateLimitedRequest(page, requestFn, context = "") {
|
||||
return withRetry(requestFn, {
|
||||
maxRetries: 5,
|
||||
initialDelay: 2000,
|
||||
maxDelay: 60000,
|
||||
shouldRetry: (error) => {
|
||||
// Retry on rate limit (429) or temporary errors
|
||||
if (error.status === 429 || error.statusCode === 429) {
|
||||
console.log(`Rate limited (429) ${context}. Backing off...`);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Retry on 5xx server errors
|
||||
if (error.status >= 500 || error.statusCode >= 500) {
|
||||
console.log(
|
||||
`Server error (${
|
||||
error.status || error.statusCode
|
||||
}) ${context}. Retrying...`
|
||||
);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Retry on network errors
|
||||
if (error.code === "ECONNRESET" || error.code === "ETIMEDOUT") {
|
||||
console.log(`Network error (${error.code}) ${context}. Retrying...`);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Don't retry on client errors (4xx except 429)
|
||||
return false;
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
randomSleep,
|
||||
humanLikeMouseMovement,
|
||||
randomScroll,
|
||||
simulateHumanBehavior,
|
||||
withRetry,
|
||||
handleRateLimitedRequest,
|
||||
};
|
||||
Reference in New Issue
Block a user