I'm trying to scrape this news website "https://inshorts.com/en/read/national" and i'm just getting the results for just the displayed articles, i need all the articles on the website which contain the word (eg."COVID-19"), and don't have to use the "load more" button.
Here's my code which gives the current articles:
import requestsfrom bs4 import BeautifulSoupimport pandas as pddummy_url="https://inshorts.com/en/read/badminton"data_dummy=requests.get(dummy_url)soup=BeautifulSoup(data_dummy.content,'html.parser')urls=["https://inshorts.com/en/read/national"]news_data_content,news_data_title,news_data_category,news_data_time=[],[],[],[]for url in urls: category=url.split('/')[-1] data=requests.get(url) soup=BeautifulSoup(data.content,'html.parser') news_title=[] news_content=[] news_category=[] news_time = [] for headline,article,time in zip(soup.find_all('div', class_=["news-card-title news-right-box"]), soup.find_all('div',class_=["news-card-content news-right-box"]), soup.find_all('div', class_ = ["news-card-author-time news-card-author-time-in-title"])): news_title.append(headline.find('span',attrs={'itemprop':"headline"}).string) news_content.append(article.find('div',attrs={'itemprop':"articleBody"}).string) news_time.append(time.find('span', clas=["date"])) news_category.append(category) news_data_title.extend(news_title) news_data_content.extend(news_content) news_data_category.extend(news_category) news_data_time.extend(news_time)df1=pd.DataFrame(news_data_title,columns=["Title"])df2=pd.DataFrame(news_data_content,columns=["Content"])df3=pd.DataFrame(news_data_category,columns=["Category"])df4=pd.DataFrame(news_data_time, columns=["time"])df=pd.concat([df1,df2,df3,df4],axis=1)def name(): a = input("File Name: ") return ab = name()df.to_csv(b +".csv")