instagram-scraper/server.js

const {
  loginWithSession,
  extractSession,
  scrapeWorkflow,
  getFollowingsList,
  scrapeProfile,
  cronJobs,
} = require("./scraper.js");
const { randomSleep, simulateHumanBehavior } = require("./utils.js");
const fs = require("fs");
require("dotenv").config();

// Full workflow: Login, browse, scrape followings and profiles
async function fullScrapingWorkflow() {
  console.log("Starting Instagram Full Scraping Workflow...\n");

  // Start total timer
  const totalStartTime = Date.now();

  const credentials = {
    username: process.env.INSTAGRAM_USERNAME || "your_username",
    password: process.env.INSTAGRAM_PASSWORD || "your_password",
  };

  const targetUsername = process.env.TARGET_USERNAME || "instagram";
  const maxFollowing = parseInt(process.env.MAX_FOLLOWING || "20", 10);
  const maxProfilesToScrape = parseInt(process.env.MAX_PROFILES || "5", 10);
  const proxy = process.env.PROXY || null;

  let browser, page;

  try {
    console.log("Configuration:");
    console.log(`   Target: @${targetUsername}`);
    console.log(`   Max following to fetch: ${maxFollowing}`);
    console.log(`   Max profiles to scrape: ${maxProfilesToScrape}`);
    console.log(`   Proxy: ${proxy || "None"}\n`);

    // Step 1: Login (with session reuse)
    console.log("Step 1: Logging in to Instagram...");
    const loginResult = await loginWithSession(credentials, proxy, true);
    browser = loginResult.browser;
    page = loginResult.page;

    if (loginResult.sessionReused) {
      console.log("Reused existing session!\n");
    } else {
      console.log("Fresh login successful!\n");
    }

    // Step 2: Extract and save session
    console.log("Step 2: Extracting session cookies...");
    const session = await extractSession(page);
    fs.writeFileSync("session_cookies.json", JSON.stringify(session, null, 2));
    console.log(`Session saved (${session.cookies.length} cookies)\n`);

    // Step 3: Simulate browsing before scraping
    console.log("Step 3: Simulating human browsing behavior...");
    await simulateHumanBehavior(page, { mouseMovements: 5, scrolls: 3 });
    await randomSleep(2000, 4000);
    console.log("Browsing simulation complete\n");

    // Step 4: Get followings list
    console.log(`👥 Step 4: Fetching following list for @${targetUsername}...`);
    const followingsStartTime = Date.now();

    const followingsData = await getFollowingsList(
      page,
      targetUsername,
      maxFollowing
    );

    const followingsEndTime = Date.now();
    const followingsTime = (
      (followingsEndTime - followingsStartTime) /
      1000
    ).toFixed(2);

    console.log(
      `✓ Captured ${followingsData.fullData.length} followings in ${followingsTime}s\n`
    );

    // Save followings data
    const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
    const followingsFile = `followings_${targetUsername}_${timestamp}.json`;
    fs.writeFileSync(
      followingsFile,
      JSON.stringify(
        {
          targetUsername,
          scrapedAt: new Date().toISOString(),
          totalFollowings: followingsData.fullData.length,
          followings: followingsData.fullData,
        },
        null,
        2
      )
    );
    console.log(`Followings data saved to: ${followingsFile}\n`);

    // Step 5: Scrape individual profiles
    console.log(
      `📊 Step 5: Scraping ${maxProfilesToScrape} individual profiles...`
    );
    const profilesStartTime = Date.now();
    const profilesData = [];
    const usernamesToScrape = followingsData.usernames.slice(
      0,
      maxProfilesToScrape
    );

    for (let i = 0; i < usernamesToScrape.length; i++) {
      const username = usernamesToScrape[i];
      console.log(
        `   [${i + 1}/${usernamesToScrape.length}] Scraping @${username}...`
      );

      try {
        const profileData = await scrapeProfile(page, username);
        profilesData.push(profileData);
        console.log(`   @${username}: ${profileData.followerCount} followers`);

        // Human-like delay between profiles
        await randomSleep(3000, 6000);

        // Take a longer break every 3 profiles
        if ((i + 1) % 3 === 0 && i < usernamesToScrape.length - 1) {
          console.log("   ⏸ Taking a human-like break...");
          await simulateHumanBehavior(page, { mouseMovements: 4, scrolls: 2 });
          await randomSleep(8000, 12000);
        }
      } catch (error) {
        console.log(`   Failed to scrape @${username}: ${error.message}`);
      }
    }

    const profilesEndTime = Date.now();
    const profilesTime = ((profilesEndTime - profilesStartTime) / 1000).toFixed(
      2
    );

    console.log(
      `\n✓ Scraped ${profilesData.length} profiles in ${profilesTime}s\n`
    );

    // Step 6: Save profiles data
    console.log("Step 6: Saving profile data...");
    const profilesFile = `profiles_${targetUsername}_${timestamp}.json`;
    fs.writeFileSync(
      profilesFile,
      JSON.stringify(
        {
          targetUsername,
          scrapedAt: new Date().toISOString(),
          totalProfiles: profilesData.length,
          profiles: profilesData,
        },
        null,
        2
      )
    );
    console.log(`Profiles data saved to: ${profilesFile}\n`);

    // Calculate total time
    const totalEndTime = Date.now();
    const totalTime = ((totalEndTime - totalStartTime) / 1000).toFixed(2);
    const totalMinutes = Math.floor(totalTime / 60);
    const totalSeconds = (totalTime % 60).toFixed(2);

    // Step 7: Summary
    console.log("=".repeat(60));
    console.log("📊 SCRAPING SUMMARY");
    console.log("=".repeat(60));
    console.log(`✓ Logged in successfully`);
    console.log(`✓ Session cookies saved`);
    console.log(
      `✓ ${followingsData.fullData.length} followings captured in ${followingsTime}s`
    );
    console.log(
      `✓ ${profilesData.length} profiles scraped in ${profilesTime}s`
    );
    console.log(`\n📁 Files created:`);
    console.log(`   • ${followingsFile}`);
    console.log(`   • ${profilesFile}`);
    console.log(`   • session_cookies.json`);
    console.log(
      `\n⏱️  Total execution time: ${totalMinutes}m ${totalSeconds}s`
    );
    console.log("=".repeat(60) + "\n");

    return {
      success: true,
      followingsCount: followingsData.fullData.length,
      profilesCount: profilesData.length,
      followingsData: followingsData.fullData,
      profilesData,
      session,
      timings: {
        followingsTime: parseFloat(followingsTime),
        profilesTime: parseFloat(profilesTime),
        totalTime: parseFloat(totalTime),
      },
    };
  } catch (error) {
    console.error("\nScraping workflow failed:");
    console.error(error.message);
    console.error(error.stack);
    throw error;
  } finally {
    if (browser) {
      console.log("Closing browser...");
      await browser.close();
      console.log("Browser closed\n");
    }
  }
}

// Alternative: Use the built-in scrapeWorkflow function
async function simpleWorkflow() {
  console.log("Starting Simple Scraping Workflow (using scrapeWorkflow)...\n");

  const credentials = {
    username: process.env.INSTAGRAM_USERNAME || "your_username",
    password: process.env.INSTAGRAM_PASSWORD || "your_password",
  };

  const targetUsername = process.env.TARGET_USERNAME || "instagram";
  const maxFollowing = parseInt(process.env.MAX_FOLLOWING || "20", 10);
  const proxy = process.env.PROXY || null;

  try {
    console.log(`Target: @${targetUsername}`);
    console.log(`Max following to scrape: ${maxFollowing}`);
    console.log(`Using proxy: ${proxy || "None"}\n`);

    const result = await scrapeWorkflow(
      credentials,
      targetUsername,
      proxy,
      maxFollowing
    );

    console.log("\nScraping completed successfully!");
    console.log(`Total profiles scraped: ${result.scrapedProfiles}`);
    console.log(
      `Full following data captured: ${result.followingsFullData.length} users`
    );

    // Save the data
    if (result.followingsFullData.length > 0) {
      const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
      const filename = `scraped_data_${targetUsername}_${timestamp}.json`;

      fs.writeFileSync(
        filename,
        JSON.stringify(
          {
            targetUsername,
            scrapedAt: new Date().toISOString(),
            totalUsers: result.followingsFullData.length,
            data: result.followingsFullData,
          },
          null,
          2
        )
      );

      console.log(`Data saved to: ${filename}`);
    }

    return result;
  } catch (error) {
    console.error("\nScraping failed:");
    console.error(error.message);
    throw error;
  }
}

// Scheduled scraping with cron
async function scheduledScraping() {
  console.log("Starting Scheduled Scraping...\n");

  const credentials = {
    username: process.env.INSTAGRAM_USERNAME || "your_username",
    password: process.env.INSTAGRAM_PASSWORD || "your_password",
  };

  const targetUsername = process.env.TARGET_USERNAME || "instagram";
  const intervalMinutes = parseInt(process.env.SCRAPE_INTERVAL || "60", 10);
  const maxRuns = parseInt(process.env.MAX_RUNS || "5", 10);

  console.log(
    `Will scrape @${targetUsername} every ${intervalMinutes} minutes`
  );
  console.log(`Maximum runs: ${maxRuns}\n`);

  let runCount = 0;

  const stopCron = await cronJobs(
    async () => {
      runCount++;
      console.log(`\n${"=".repeat(60)}`);
      console.log(
        `📅 Scheduled Run #${runCount} - ${new Date().toLocaleString()}`
      );
      console.log("=".repeat(60));

      try {
        await simpleWorkflow();
      } catch (error) {
        console.error(`Run #${runCount} failed:`, error.message);
      }

      if (runCount >= maxRuns) {
        console.log(`\nCompleted ${maxRuns} scheduled runs. Stopping...`);
        process.exit(0);
      }
    },
    intervalMinutes * 60, // Convert to seconds
    maxRuns
  );

  console.log("Cron job started. Press Ctrl+C to stop.\n");
}

// Main entry point
if (require.main === module) {
  const mode = process.env.MODE || "full"; // full, simple, or scheduled

  console.log(`Mode: ${mode}\n`);

  let workflow;
  if (mode === "simple") {
    workflow = simpleWorkflow();
  } else if (mode === "scheduled") {
    workflow = scheduledScraping();
  } else {
    workflow = fullScrapingWorkflow();
  }

  workflow
    .then(() => {
      console.log("All done!");
      process.exit(0);
    })
    .catch((err) => {
      console.error("\nFatal error:", err);
      process.exit(1);
    });
}

module.exports = {
  fullScrapingWorkflow,
  simpleWorkflow,
  scheduledScraping,
};