const { loginWithSession, extractSession, scrapeWorkflow, getFollowingsList, scrapeProfile, cronJobs, } = require("./scraper.js"); const { randomSleep, simulateHumanBehavior } = require("./utils.js"); const fs = require("fs"); require("dotenv").config(); // Full workflow: Login, browse, scrape followings and profiles async function fullScrapingWorkflow() { console.log("Starting Instagram Full Scraping Workflow...\n"); // Start total timer const totalStartTime = Date.now(); const credentials = { username: process.env.INSTAGRAM_USERNAME || "your_username", password: process.env.INSTAGRAM_PASSWORD || "your_password", }; const targetUsername = process.env.TARGET_USERNAME || "instagram"; const maxFollowing = parseInt(process.env.MAX_FOLLOWING || "20", 10); const maxProfilesToScrape = parseInt(process.env.MAX_PROFILES || "5", 10); const proxy = process.env.PROXY || null; let browser, page; try { console.log("Configuration:"); console.log(` Target: @${targetUsername}`); console.log(` Max following to fetch: ${maxFollowing}`); console.log(` Max profiles to scrape: ${maxProfilesToScrape}`); console.log(` Proxy: ${proxy || "None"}\n`); // Step 1: Login (with session reuse) console.log("Step 1: Logging in to Instagram..."); const loginResult = await loginWithSession(credentials, proxy, true); browser = loginResult.browser; page = loginResult.page; if (loginResult.sessionReused) { console.log("Reused existing session!\n"); } else { console.log("Fresh login successful!\n"); } // Step 2: Extract and save session console.log("Step 2: Extracting session cookies..."); const session = await extractSession(page); fs.writeFileSync("session_cookies.json", JSON.stringify(session, null, 2)); console.log(`Session saved (${session.cookies.length} cookies)\n`); // Step 3: Simulate browsing before scraping console.log("Step 3: Simulating human browsing behavior..."); await simulateHumanBehavior(page, { mouseMovements: 5, scrolls: 3 }); await randomSleep(2000, 4000); console.log("Browsing simulation complete\n"); // Step 4: Get followings list console.log(`šŸ‘„ Step 4: Fetching following list for @${targetUsername}...`); const followingsStartTime = Date.now(); const followingsData = await getFollowingsList( page, targetUsername, maxFollowing ); const followingsEndTime = Date.now(); const followingsTime = ( (followingsEndTime - followingsStartTime) / 1000 ).toFixed(2); console.log( `āœ“ Captured ${followingsData.fullData.length} followings in ${followingsTime}s\n` ); // Save followings data const timestamp = new Date().toISOString().replace(/[:.]/g, "-"); const followingsFile = `followings_${targetUsername}_${timestamp}.json`; fs.writeFileSync( followingsFile, JSON.stringify( { targetUsername, scrapedAt: new Date().toISOString(), totalFollowings: followingsData.fullData.length, followings: followingsData.fullData, }, null, 2 ) ); console.log(`Followings data saved to: ${followingsFile}\n`); // Step 5: Scrape individual profiles console.log( `šŸ“Š Step 5: Scraping ${maxProfilesToScrape} individual profiles...` ); const profilesStartTime = Date.now(); const profilesData = []; const usernamesToScrape = followingsData.usernames.slice( 0, maxProfilesToScrape ); for (let i = 0; i < usernamesToScrape.length; i++) { const username = usernamesToScrape[i]; console.log( ` [${i + 1}/${usernamesToScrape.length}] Scraping @${username}...` ); try { const profileData = await scrapeProfile(page, username); profilesData.push(profileData); console.log(` @${username}: ${profileData.followerCount} followers`); // Human-like delay between profiles await randomSleep(3000, 6000); // Take a longer break every 3 profiles if ((i + 1) % 3 === 0 && i < usernamesToScrape.length - 1) { console.log(" āø Taking a human-like break..."); await simulateHumanBehavior(page, { mouseMovements: 4, scrolls: 2 }); await randomSleep(8000, 12000); } } catch (error) { console.log(` Failed to scrape @${username}: ${error.message}`); } } const profilesEndTime = Date.now(); const profilesTime = ((profilesEndTime - profilesStartTime) / 1000).toFixed( 2 ); console.log( `\nāœ“ Scraped ${profilesData.length} profiles in ${profilesTime}s\n` ); // Step 6: Save profiles data console.log("Step 6: Saving profile data..."); const profilesFile = `profiles_${targetUsername}_${timestamp}.json`; fs.writeFileSync( profilesFile, JSON.stringify( { targetUsername, scrapedAt: new Date().toISOString(), totalProfiles: profilesData.length, profiles: profilesData, }, null, 2 ) ); console.log(`Profiles data saved to: ${profilesFile}\n`); // Calculate total time const totalEndTime = Date.now(); const totalTime = ((totalEndTime - totalStartTime) / 1000).toFixed(2); const totalMinutes = Math.floor(totalTime / 60); const totalSeconds = (totalTime % 60).toFixed(2); // Step 7: Summary console.log("=".repeat(60)); console.log("šŸ“Š SCRAPING SUMMARY"); console.log("=".repeat(60)); console.log(`āœ“ Logged in successfully`); console.log(`āœ“ Session cookies saved`); console.log( `āœ“ ${followingsData.fullData.length} followings captured in ${followingsTime}s` ); console.log( `āœ“ ${profilesData.length} profiles scraped in ${profilesTime}s` ); console.log(`\nšŸ“ Files created:`); console.log(` • ${followingsFile}`); console.log(` • ${profilesFile}`); console.log(` • session_cookies.json`); console.log( `\nā±ļø Total execution time: ${totalMinutes}m ${totalSeconds}s` ); console.log("=".repeat(60) + "\n"); return { success: true, followingsCount: followingsData.fullData.length, profilesCount: profilesData.length, followingsData: followingsData.fullData, profilesData, session, timings: { followingsTime: parseFloat(followingsTime), profilesTime: parseFloat(profilesTime), totalTime: parseFloat(totalTime), }, }; } catch (error) { console.error("\nScraping workflow failed:"); console.error(error.message); console.error(error.stack); throw error; } finally { if (browser) { console.log("Closing browser..."); await browser.close(); console.log("Browser closed\n"); } } } // Alternative: Use the built-in scrapeWorkflow function async function simpleWorkflow() { console.log("Starting Simple Scraping Workflow (using scrapeWorkflow)...\n"); const credentials = { username: process.env.INSTAGRAM_USERNAME || "your_username", password: process.env.INSTAGRAM_PASSWORD || "your_password", }; const targetUsername = process.env.TARGET_USERNAME || "instagram"; const maxFollowing = parseInt(process.env.MAX_FOLLOWING || "20", 10); const proxy = process.env.PROXY || null; try { console.log(`Target: @${targetUsername}`); console.log(`Max following to scrape: ${maxFollowing}`); console.log(`Using proxy: ${proxy || "None"}\n`); const result = await scrapeWorkflow( credentials, targetUsername, proxy, maxFollowing ); console.log("\nScraping completed successfully!"); console.log(`Total profiles scraped: ${result.scrapedProfiles}`); console.log( `Full following data captured: ${result.followingsFullData.length} users` ); // Save the data if (result.followingsFullData.length > 0) { const timestamp = new Date().toISOString().replace(/[:.]/g, "-"); const filename = `scraped_data_${targetUsername}_${timestamp}.json`; fs.writeFileSync( filename, JSON.stringify( { targetUsername, scrapedAt: new Date().toISOString(), totalUsers: result.followingsFullData.length, data: result.followingsFullData, }, null, 2 ) ); console.log(`Data saved to: ${filename}`); } return result; } catch (error) { console.error("\nScraping failed:"); console.error(error.message); throw error; } } // Scheduled scraping with cron async function scheduledScraping() { console.log("Starting Scheduled Scraping...\n"); const credentials = { username: process.env.INSTAGRAM_USERNAME || "your_username", password: process.env.INSTAGRAM_PASSWORD || "your_password", }; const targetUsername = process.env.TARGET_USERNAME || "instagram"; const intervalMinutes = parseInt(process.env.SCRAPE_INTERVAL || "60", 10); const maxRuns = parseInt(process.env.MAX_RUNS || "5", 10); console.log( `Will scrape @${targetUsername} every ${intervalMinutes} minutes` ); console.log(`Maximum runs: ${maxRuns}\n`); let runCount = 0; const stopCron = await cronJobs( async () => { runCount++; console.log(`\n${"=".repeat(60)}`); console.log( `šŸ“… Scheduled Run #${runCount} - ${new Date().toLocaleString()}` ); console.log("=".repeat(60)); try { await simpleWorkflow(); } catch (error) { console.error(`Run #${runCount} failed:`, error.message); } if (runCount >= maxRuns) { console.log(`\nCompleted ${maxRuns} scheduled runs. Stopping...`); process.exit(0); } }, intervalMinutes * 60, // Convert to seconds maxRuns ); console.log("Cron job started. Press Ctrl+C to stop.\n"); } // Main entry point if (require.main === module) { const mode = process.env.MODE || "full"; // full, simple, or scheduled console.log(`Mode: ${mode}\n`); let workflow; if (mode === "simple") { workflow = simpleWorkflow(); } else if (mode === "scheduled") { workflow = scheduledScraping(); } else { workflow = fullScrapingWorkflow(); } workflow .then(() => { console.log("All done!"); process.exit(0); }) .catch((err) => { console.error("\nFatal error:", err); process.exit(1); }); } module.exports = { fullScrapingWorkflow, simpleWorkflow, scheduledScraping, };