We can retrieve all the links on a web page by using Selenium’s findElements() method with the tag name "a" and then extract each URL using the getAttribute("href") method of the WebElement.
Once the URLs are identified, we can check for broken links by creating an HttpURLConnection for each URL, sending an HTTP request, and capturing the response status code.
If the returned status code is greater than or equal to 400, the link can be considered broken. This is because HTTP status codes in the 4xx range indicate client-side errors, while those in the 5xx range indicate server-side errors. Below is the script given to find the broken links with selenium using Java
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.List;
import org.testng.annotations.Test;
public class BrokenLinkChecker {
WebDriver driver = null;
@Test
public void brokenLinkValidator() {
// Set path to your chromedriver
System.setProperty(“webdriver.chrome.driver”, “path/to/chromedriver”);
driver = new ChromeDriver();
driver.get(“https://google.com”); // Replace with your target URL
// Get all <a> elements
List<WebElement> links = driver.findElements(By.tagName(“a”));
System.out.println(“Total links found in the web page: ” + links.size());
for (WebElement link : links) {
String url = link.getAttribute(“href”);
if (url == null || url.isEmpty() || !url.startsWith(“http”)) {
System.out.println(“Link is empty or not valid: ” + link.getText());
continue;
}
try {
HttpURLConnection connection = (HttpURLConnection) new URL(url).openConnection();
connection.setConnectTimeout(3000); // 3 seconds timeout
connection.connect();
int responseCode = connection.getResponseCode();
if (responseCode >= 400) {
System.out.println(“Broken link Identified: ” + url + ” – Code: ” + responseCode);
} else {
System.out.println(“Valid link: ” + url + ” – Code: ” + responseCode);
}
} catch (Exception e) {
System.out.println(“Exception thrown while checking link: ” + url + ” – with message: ” + e.getMessage());
}
}
driver.quit();
}
}