feat: Instagram scraper with GraphQL API integration - Automated followings list extraction via API interception - Profile scraping using GraphQL endpoint interception - DOM fallback for edge cases - Performance timing for all operations - Anti-bot measures and human-like behavior simulation

This commit is contained in:
2025-10-31 23:06:06 +05:45
parent ba2dcec881
commit 6f4f37bee5
8 changed files with 3474 additions and 0 deletions

146
utils.js Normal file
View File

@@ -0,0 +1,146 @@
function randomSleep(minMs = 2000, maxMs = 5000) {
const delay = Math.floor(Math.random() * (maxMs - minMs + 1)) + minMs;
return new Promise((res) => setTimeout(res, delay));
}
async function humanLikeMouseMovement(page, steps = 10) {
// Simulate human-like mouse movements across the page
const viewport = await page.viewport();
const width = viewport.width;
const height = viewport.height;
for (let i = 0; i < steps; i++) {
const x = Math.floor(Math.random() * width);
const y = Math.floor(Math.random() * height);
await page.mouse.move(x, y, { steps: Math.floor(Math.random() * 10) + 5 });
await randomSleep(100, 500);
}
}
async function randomScroll(page, scrollCount = 3) {
// Perform random scrolling to simulate human behavior
for (let i = 0; i < scrollCount; i++) {
const scrollAmount = Math.floor(Math.random() * 300) + 100;
await page.evaluate((amount) => {
window.scrollBy(0, amount);
}, scrollAmount);
await randomSleep(800, 1500);
}
}
async function simulateHumanBehavior(page, options = {}) {
// Combined function to simulate various human-like behaviors
const { mouseMovements = 5, scrolls = 2, randomClicks = false } = options;
// Random mouse movements
if (mouseMovements > 0) {
await humanLikeMouseMovement(page, mouseMovements);
}
// Random scrolling
if (scrolls > 0) {
await randomScroll(page, scrolls);
}
// Optional: Random clicks on non-interactive elements
if (randomClicks) {
try {
await page.evaluate(() => {
const elements = document.querySelectorAll("div, span, p");
if (elements.length > 0) {
const randomElement =
elements[Math.floor(Math.random() * elements.length)];
const rect = randomElement.getBoundingClientRect();
// Just move to it, don't actually click to avoid triggering actions
}
});
} catch (err) {
// Ignore errors from random element selection
}
}
await randomSleep(500, 1000);
}
async function withRetry(fn, options = {}) {
const {
maxRetries = 3,
initialDelay = 2000,
maxDelay = 30000,
shouldRetry = (error) => true,
} = options;
for (let attempt = 0; attempt < maxRetries; attempt++) {
try {
return await fn();
} catch (error) {
const isLastAttempt = attempt === maxRetries - 1;
// Check if we should retry this error
if (!shouldRetry(error) || isLastAttempt) {
throw error;
}
// Calculate exponential backoff delay: 2s, 4s, 8s, 16s, 30s (capped)
const exponentialDelay = Math.min(
initialDelay * Math.pow(2, attempt),
maxDelay
);
// Add jitter (randomize ±20%) to avoid thundering herd
const jitter = exponentialDelay * (0.8 + Math.random() * 0.4);
const delay = Math.floor(jitter);
console.log(
`Retry attempt ${attempt + 1}/${maxRetries} after ${delay}ms delay...`
);
console.log(`Error: ${error.message || error}`);
await randomSleep(delay, delay);
}
}
}
async function handleRateLimitedRequest(page, requestFn, context = "") {
return withRetry(requestFn, {
maxRetries: 5,
initialDelay: 2000,
maxDelay: 60000,
shouldRetry: (error) => {
// Retry on rate limit (429) or temporary errors
if (error.status === 429 || error.statusCode === 429) {
console.log(`Rate limited (429) ${context}. Backing off...`);
return true;
}
// Retry on 5xx server errors
if (error.status >= 500 || error.statusCode >= 500) {
console.log(
`Server error (${
error.status || error.statusCode
}) ${context}. Retrying...`
);
return true;
}
// Retry on network errors
if (error.code === "ECONNRESET" || error.code === "ETIMEDOUT") {
console.log(`Network error (${error.code}) ${context}. Retrying...`);
return true;
}
// Don't retry on client errors (4xx except 429)
return false;
},
});
}
module.exports = {
randomSleep,
humanLikeMouseMovement,
randomScroll,
simulateHumanBehavior,
withRetry,
handleRateLimitedRequest,
};