Extract Links from an HTML File in Node.JS
Do you need to extract data from an HTML file? If so, we have an API that can easily assist in extracting resolved URLs (fully qualified if possible) from an input HTML file. To ensure the process runs smoothly you will need either the HTML file or the URL of a file to perform the operation on; you have the option to include the base URL of the page as well.
To get things started in Node.JS, we will run this command to install the SDK:
npm install cloudmersive-convert-api-client –-save
Or, add this snippet to your package.json:
"dependencies": {
"cloudmersive-convert-api-client": "^2.6.3"
}
Our next step is to create an instance of the API and call the function with the following code:
var CloudmersiveConvertApiClient = require('cloudmersive-convert-api-client');
var defaultClient = CloudmersiveConvertApiClient.ApiClient.instance;// Configure API key authorization: Apikey
var Apikey = defaultClient.authentications['Apikey'];
Apikey.apiKey = 'YOUR API KEY';var apiInstance = new CloudmersiveConvertApiClient.EditHtmlApi();var opts = {
'inputFile': Buffer.from(fs.readFileSync("C:\\temp\\inputfile").buffer), // File | Optional: Input file to perform the operation on.
'inputFileUrl': "inputFileUrl_example", // String | Optional: URL of a file to operate on as input.
'baseUrl': "baseUrl_example" // String | Optional: Base URL of the page, such as https://mydomain.com
};var callback = function(error, data, response) {
if (error) {
console.error(error);
} else {
console.log('API called successfully. Returned data: ' + data);
}
};
apiInstance.editHtmlHtmlGetLinks(opts, callback);
In no time at all, the returned information will display a list of the located links and their corresponding names/URLs. To retrieve your personal API key, head to the Cloudmersive website to register for a free account that will give you access to 800 monthly calls across our library of APIs.