Migrating From the Universal Scraper API to the Idealista API
Making the switch from the Universal Scraper API to specialized Idealista Scraper APIs significantly simplifies the scraping process. This guide walks you through the migration process from the Universal Scraper API to using dedicated Idealista APIs.Throughout this guide, you’ll learn:
How to extract Idealista property data using the Universal Scraper API.
Steps to migrate to the dedicated Idealista Scraper APIs.
When scraping property data from Idealista with the Universal Scraper API, you need to configure your requests and then process the returned data. To collect data from an Idealista property listing, you’ll need to configure the Universal Scraper API with appropriate parameters:
Universal Scraper API
Copy
Ask AI
# pip install requestsimport requestsurl = 'https://www.idealista.com/en/inmueble/107012340/'apikey = 'YOUR_ZENROWS_API_KEY'params = { 'url': url, 'apikey': apikey, 'js_render': 'true', # Enable JavaScript rendering 'premium_proxy': 'true', # Use premium proxy feature}response = requests.get('https://api.zenrows.com/v1/', params=params)# Check if the request was successfulif response.status_code == 200: print(response.text) # Return the raw HTML contentelse: print(f"Request failed with status code {response.status_code}: {response.text}")
This code sends a request through the Universal Scraper API with the required parameters to fetch property data from Idealista.
Once the HTML content is obtained, the next step is to parse the webpage using BeautifulSoup to extract relevant property details and transforms them into a structured format.
Parsing Logic
Copy
Ask AI
from bs4 import BeautifulSoupdef parse_property_html(html): if not html: print("No HTML to parse") return None try: soup = BeautifulSoup(html, "html.parser") # Property title title_tag = soup.select_one("span.main-info__title-main") title = title_tag.get_text(strip=True) if title_tag else "N/A" # Price price_tag = soup.select_one("span.info-data-price") price = price_tag.get_text(strip=True).replace('\xa0', ' ') if price_tag else "N/A" # Area (square meters) area_tag = soup.select_one("div.info-features span:nth-of-type(1)") square_feet = area_tag.get_text(strip=True) if area_tag else "N/A" # Bedrooms bedrooms_tag = soup.select_one("div.info-features span:nth-of-type(2)") bedrooms = bedrooms_tag.get_text(strip=True) if bedrooms_tag else "N/A" # Bathrooms — from the details section bathrooms_tag = soup.select_one("div.details-property_features li:-soup-contains('bathrooms')") if not bathrooms_tag: bathrooms_tag = soup.find("li", string=lambda text: text and "bathrooms" in text.lower()) bathrooms = bathrooms_tag.get_text(strip=True) if bathrooms_tag else "N/A" processed_data = { "property_title": title, "price": price, "bedrooms": bedrooms, "bathrooms": bathrooms, "square_feet": square_feet } return processed_data except Exception as e: print(f"Error parsing HTML: {e}") return None
These CSS selectors are fragile and may break if the website structure changes, as websites like Idealista can update their HTML at any time. They require constant maintenance and monitoring to keep your scraper functional.
After retrieving the data, save the parsed property details to a CSV file. CSV format makes it easy to share and analyze the information further.
Saving Data In A CSV File
Copy
Ask AI
import csv# ...def save_to_csv(data, filename="idealista_property.csv"): if not data: print("No data to save") return try: # Save to CSV format with open(filename, mode="w", newline="", encoding="utf-8") as file: writer = csv.DictWriter(file, fieldnames=data.keys()) writer.writeheader() writer.writerow(data) print(f"Data saved to {filename}") except Exception as e: print(f"Error saving data to CSV: {e}")
This function converts the raw property data into a usable format and exports it to a CSV file for analysis and reference.
Here’s the complete script that fetches, processes, and stores Idealista property data using the Universal Scraper API:
Python
Copy
Ask AI
import requestsimport csvfrom bs4 import BeautifulSoupproperty_url = "https://www.idealista.com/en/inmueble/107012340/"apikey = "YOUR_ZENROWS_API_KEY"# Step 1: API Calldef get_property_html(property_url): params = { "url": property_url, "apikey": apikey, "js_render": "true", # Enables JavaScript rendering "premium_proxy": "true" # Uses premium proxies for better reliability } response = requests.get("https://api.zenrows.com/v1/", params=params) if response.status_code == 200: return response.text # Return the raw HTML else: print(f"Request failed with status code {response.status_code}: {response.text}") return None# Step 2: Parsing the HTML Responsedef parse_property_html(html): if not html: print("No HTML to parse") return None try: soup = BeautifulSoup(html, "html.parser") # Property title title_tag = soup.select_one("span.main-info__title-main") title = title_tag.get_text(strip=True) if title_tag else "N/A" # Price price_tag = soup.select_one("span.info-data-price") price = price_tag.get_text(strip=True).replace('\xa0', ' ') if price_tag else "N/A" # Area (square meters) area_tag = soup.select_one("div.info-features span:nth-of-type(1)") square_feet = area_tag.get_text(strip=True) if area_tag else "N/A" # Bedrooms bedrooms_tag = soup.select_one("div.info-features span:nth-of-type(2)") bedrooms = bedrooms_tag.get_text(strip=True) if bedrooms_tag else "N/A" # Bathrooms — from the details section bathrooms_tag = soup.select_one("div.details-property_features li:-soup-contains('bathrooms')") if not bathrooms_tag: bathrooms_tag = soup.find("li", string=lambda text: text and "bathrooms" in text.lower()) bathrooms = bathrooms_tag.get_text(strip=True) if bathrooms_tag else "N/A" processed_data = { "property_title": title, "price": price, "bedrooms": bedrooms, "bathrooms": bathrooms, "square_feet": square_feet } return processed_data except Exception as e: print(f"Error parsing HTML: {e}") return None# Step 3: Storing Data In A CSV Filedef save_to_csv(data, filename="idealista_property.csv"): if not data: print("No data to save") return try: # Save to CSV format with open(filename, mode="w", newline="", encoding="utf-8") as file: writer = csv.DictWriter(file, fieldnames=data.keys()) writer.writeheader() writer.writerow(data) print(f"Data saved to {filename}") except Exception as e: print(f"Error saving data to CSV: {e}")# Everything Together: Full Workflowhtml_response = get_property_html(property_url) # Step 1: Fetch the raw property HTML via the APIparsed_data = parse_property_html(html_response) # Step 2: Parse the raw HTML into a structured formatsave_to_csv(parsed_data) # Step 3: Save the structured data into a CSV file
The Idealista Scraper APIs deliver properly formatted real estate data through two specialized endpoints: the Idealista Property Data API and the Idealista Discovery API. These purpose-built solutions offer numerous improvements over the Universal Scraper API:
No need to maintain selectors or parsing logic: The Zillow APIs return structured data, so you don’t need to use BeautifulSoup, XPath, or fragile CSS selectors.
Maintenance-Free Operation: The APIs automatically adapt to Idealista website changes without requiring any code updates or parameter adjustments like js_render, premium_proxy, or autoparse.
Easier implementation: Specialized endpoints for Idealista data requiring much less code.
Higher data quality: Custom extraction algorithms that consistently deliver accurate data.
Predictable cost structure: Transparent pricing that helps plan for large-scale data collection.
The Idealista Property Data API delivers complete property data, including features, pricing, agent details, etc., in a ready-to-use format.Here’s how to implement the Idealista Property Data API:
Idealista Property Data API
Copy
Ask AI
# pip install requests and csvimport requestsimport csvproperty_id = "107012340"api_endpoint = "https://realestate.api.zenrows.com/v1/targets/idealista/properties/"# Step 1: Fetch property data from the ZenRows Idealista APIdef get_property_data(property_id): url = api_endpoint + property_id params = { "apikey": "YOUR_ZENROWS_API_KEY", } response = requests.get(url, params=params) if response.status_code == 200: return response.json() # Return full API response else: print(f"Request failed with status code {response.status_code}: {response.text}") return None# Step 2: Save the property data to CSVdef save_property_to_csv(property_data, filename="idealista_property.csv"): if not property_data: print("No data to save") return # the API returns clean, structured data that can be saved directly with open(filename, mode="w", newline="", encoding="utf-8") as file: fieldnames = property_data.keys() writer = csv.DictWriter(file, fieldnames=fieldnames) writer.writeheader() writer.writerow(property_data) print(f"Property data saved to {filename}")# Step 3: Process and saveproperty_data = get_property_data(property_id)save_property_to_csv(property_data)
Running this code exports a CSV file containing all property details in an organized, ready-to-use format:Well done! You’ve successfully transitioned to using the Idealista Property Data API, which provides clean, structured property data without the complexity of parsing HTML.Let’s now explore how the Idealista Discovery API simplifies searching and scraping properties across the platform.
The Idealista Discovery API lets you search for properties and returns essential information like addresses, prices, room counts, property classifications, links to detailed listings, etc.The API offers several optional customization options to tailor your property searches:
Language: Specify the language for results (e.g., en for English, es for Spanish).
Page Number: Request specific search results pages rather than just the first page.
Sorting: Control how results are ordered (e.g., most_recent, highest_price, relevance).
Here’s how to implement the Idealista Discovery API:
Python
Copy
Ask AI
# pip install requestsimport requestsimport csv# Find properties by locationurl = "https://www.idealista.com/en/venta-viviendas/barcelona/eixample/"params = { "apikey": "YOUR_ZENROWS_API_KEY", "url": url,}response = requests.get("https://realestate.api.zenrows.com/v1/targets/idealista/discovery/", params=params)if response.status_code == 200: data = response.json() properties = data.get("property_list", []) pagination_info = data.get("pagination", {}) if properties: with open("idealista_search_results.csv", mode="w", newline="", encoding="utf-8") as file: writer = csv.DictWriter(file, fieldnames=properties[0].keys()) writer.writeheader() writer.writerows(properties) print(f"{len(properties)} properties saved to idealista_search_results.csv") print(f"Current page: {pagination_info.get('current_page')}") if 'next_page' in pagination_info: print(f"Next page URL: {pagination_info.get('next_page')}") else: print("No properties found in search results")else: print(f"Request failed with status code {response.status_code}: {response.text}")
This code produces a CSV containing the search results with property listings.
The shift from the Universal Scraper API to Idealista Scraper APIs provides substantial improvements to how you collect and process real estate data. These dedicated tools eliminate the need for complex HTML parsing, dramatically reduce ongoing maintenance, and provide higher quality data, all while automatically adapting to any changes on the Idealista website.