""" Note: This Program fillters out most of the good links and collect links that returned error save them in separate file Some of the links may work in browser but not in python due to security of webpage So a manual check on those filtered record is needed Since the program already filtered most of the good links we can easily check the reaming link and save time This program takes a while depending on internet speed Instruction: Download the bad_link_filter and readme as raw file Then execute in your machine The bad links will be saved in error.txt file Then you have to manually check the links mentioned in error.txt file and remove the good links from the file """ def is_url_working(url): #Check the status code of webpage import requests try: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', 'Accept-Encoding': 'gzip,deflate', 'Connection': 'keep-alive', 'Access-Control-Allow-Methods': 'POST', 'Access-Control-Allow-Headers': 'X-PINGOTHER, Content-Type', 'Pragma': 'no-cache', 'Cache-Control': 'no-cache', } proxies = {"http": None,"https": None} response=requests.get(url,headers=headers,proxies=proxies) status=response.status_code if status>=400: return status except requests.exceptions.ConnectionError as ce: return 'HTTPSConnectionPool error' except Exception as e: return e def func(indexes): error_links=[] print('InProgress, Sections completed will be shown below.Please wait for a while') for index,section in indexes.items(): for title,row in section.items(): error=is_url_working(row['link']) if error: e={ 'index':index, 'title':title, 'link': row['link'], 'error':error } error_links.append(e) print(index,' section completed') return error_links def get_lines_from_file(location): #open,read,return lines after filtering empty lines and spaces lines=[] with open(location,'r') as file: lines=[line.strip() for line in file.readlines() if line.strip()] return lines def line_to_dict(line): #covert api row to dict line=line.strip().split('|') name,link=line[1].strip().split('](') name,link=name[1:],link[:-1] row={ 'link':link, 'description':line[2], 'auth':line[3], 'https':line[4], 'cors':line[5], } return name,row def section_to_dict(lines,ind): #convert section to dict section={} while ind