FreeApi/.github/broken-link-collectorr.py
iamjoker021 94d65bc059
Remove broken links (#16)
* Add broken-links to file

This file contains list of broken links removed from readme.md
Some links may be temporarily down or moved
So that we can investigate on these links and upload working links in future

* Automated broken-links collector

To implement this file you have to download readme.md(raw file) and broken_link_finder.py and execute them in your machine
This program search the entire readme.txt(raw file) and collects the broken-links in error.txt file
Some webpage return error due to bot security feature of webpage but works fine when we manually tried in browser
Hence a manual check is needed only on those filtered link which is saved in error.txt is needed
Thus this program help to narrow down our search to find broken-links

* Remove broken-links

The are total of 70 links that doesn't work maybe temporarily or permanently
Maybe some webpage moved to new pages
So I removed them from readme.md and saved them in broken-links.md for future
2021-08-19 12:09:02 +09:00

117 lines
4.5 KiB
Python

"""
Note:
This Program fillters out most of the good links and collect links that returned error save them in separate file
Some of the links may work in browser but not in python due to security of webpage
So a manual check on those filtered record is needed
Since the program already filtered most of the good links we can easily check the reaming link and save time
This program takes a while depending on internet speed
Instruction:
Download the bad_link_filter and readme as raw file
Then execute in your machine
The bad links will be saved in error.txt file
Then you have to manually check the links mentioned in error.txt file and remove the good links from the file
"""
def is_url_working(url): #Check the status code of webpage
import requests
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-us,en;q=0.5',
'Accept-Encoding': 'gzip,deflate',
'Connection': 'keep-alive',
'Access-Control-Allow-Methods': 'POST',
'Access-Control-Allow-Headers': 'X-PINGOTHER, Content-Type',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
}
proxies = {"http": None,"https": None}
response=requests.get(url,headers=headers,proxies=proxies)
status=response.status_code
if status>=400:
return status
except requests.exceptions.ConnectionError as ce:
return 'HTTPSConnectionPool error'
except Exception as e:
return e
def func(indexes):
error_links=[]
print('InProgress, Sections completed will be shown below.Please wait for a while')
for index,section in indexes.items():
for title,row in section.items():
error=is_url_working(row['link'])
if error:
e={
'index':index,
'title':title,
'link': row['link'],
'error':error
}
error_links.append(e)
print(index,' section completed')
return error_links
def get_lines_from_file(location): #open,read,return lines after filtering empty lines and spaces
lines=[]
with open(location,'r') as file:
lines=[line.strip() for line in file.readlines() if line.strip()]
return lines
def line_to_dict(line): #covert api row to dict
line=line.strip().split('|')
name,link=line[1].strip().split('](')
name,link=name[1:],link[:-1]
row={
'link':link,
'description':line[2],
'auth':line[3],
'https':line[4],
'cors':line[5],
}
return name,row
def section_to_dict(lines,ind): #convert section to dict
section={}
while ind<len(lines):
if 'Back to Index' in lines[ind]: #Break a section
break
name,row=line_to_dict(lines[ind])
section[name]=row
ind+=1
return ind,section
def get_section_wise_dict(lines): #convert unstructured lines to section wise dict
ind=0
indexes={}
while ind<len(lines):
if '###' in lines[ind]: #Enters a section
name=lines[ind][3:].strip()
ind,indexes[name]=section_to_dict(lines,ind+1+1+1)
ind+=1
return indexes
def link_to_error_file(error_links): #Enters the bad links to a file which further requires manual check
lines=[]
for row in error_links:
statement='| {} | [{}]({}) | {} |'.format(row['index'], row['title'], row['link'], str(row['error']))
lines.append(statement)
with open('error.txt','w') as file:
file.write('\n#Manual check has to be done on following links#\n\n')
file.write('| Section | API |Error/Satus Code |\n')
file.write('|---|---|---|\n')
for line in lines:
file.write(line)
file.write('\n')
print("Written to file")
print('Manual check has to be done for the links saved in error.txt')
location= input('Location of readme public api readme file: ') #Get location of raw readme file
lines=get_lines_from_file(location)
indexes=get_section_wise_dict(lines)
error_links=func(indexes)
link_to_error_file(error_links)