117 lines
4.5 KiB
Python
117 lines
4.5 KiB
Python
|
"""
|
||
|
Note:
|
||
|
This Program fillters out most of the good links and collect links that returned error save them in separate file
|
||
|
Some of the links may work in browser but not in python due to security of webpage
|
||
|
So a manual check on those filtered record is needed
|
||
|
Since the program already filtered most of the good links we can easily check the reaming link and save time
|
||
|
This program takes a while depending on internet speed
|
||
|
|
||
|
Instruction:
|
||
|
Download the bad_link_filter and readme as raw file
|
||
|
Then execute in your machine
|
||
|
The bad links will be saved in error.txt file
|
||
|
Then you have to manually check the links mentioned in error.txt file and remove the good links from the file
|
||
|
|
||
|
"""
|
||
|
|
||
|
def is_url_working(url): #Check the status code of webpage
|
||
|
import requests
|
||
|
try:
|
||
|
headers = {
|
||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36',
|
||
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||
|
'Accept-Language': 'en-us,en;q=0.5',
|
||
|
'Accept-Encoding': 'gzip,deflate',
|
||
|
'Connection': 'keep-alive',
|
||
|
'Access-Control-Allow-Methods': 'POST',
|
||
|
'Access-Control-Allow-Headers': 'X-PINGOTHER, Content-Type',
|
||
|
'Pragma': 'no-cache',
|
||
|
'Cache-Control': 'no-cache',
|
||
|
}
|
||
|
proxies = {"http": None,"https": None}
|
||
|
response=requests.get(url,headers=headers,proxies=proxies)
|
||
|
status=response.status_code
|
||
|
if status>=400:
|
||
|
return status
|
||
|
except requests.exceptions.ConnectionError as ce:
|
||
|
return 'HTTPSConnectionPool error'
|
||
|
except Exception as e:
|
||
|
return e
|
||
|
|
||
|
def func(indexes):
|
||
|
error_links=[]
|
||
|
print('InProgress, Sections completed will be shown below.Please wait for a while')
|
||
|
for index,section in indexes.items():
|
||
|
for title,row in section.items():
|
||
|
error=is_url_working(row['link'])
|
||
|
if error:
|
||
|
e={
|
||
|
'index':index,
|
||
|
'title':title,
|
||
|
'link': row['link'],
|
||
|
'error':error
|
||
|
}
|
||
|
error_links.append(e)
|
||
|
print(index,' section completed')
|
||
|
return error_links
|
||
|
|
||
|
def get_lines_from_file(location): #open,read,return lines after filtering empty lines and spaces
|
||
|
lines=[]
|
||
|
with open(location,'r') as file:
|
||
|
lines=[line.strip() for line in file.readlines() if line.strip()]
|
||
|
return lines
|
||
|
|
||
|
def line_to_dict(line): #covert api row to dict
|
||
|
line=line.strip().split('|')
|
||
|
name,link=line[1].strip().split('](')
|
||
|
name,link=name[1:],link[:-1]
|
||
|
row={
|
||
|
'link':link,
|
||
|
'description':line[2],
|
||
|
'auth':line[3],
|
||
|
'https':line[4],
|
||
|
'cors':line[5],
|
||
|
}
|
||
|
return name,row
|
||
|
|
||
|
def section_to_dict(lines,ind): #convert section to dict
|
||
|
section={}
|
||
|
while ind<len(lines):
|
||
|
if 'Back to Index' in lines[ind]: #Break a section
|
||
|
break
|
||
|
name,row=line_to_dict(lines[ind])
|
||
|
section[name]=row
|
||
|
ind+=1
|
||
|
return ind,section
|
||
|
|
||
|
def get_section_wise_dict(lines): #convert unstructured lines to section wise dict
|
||
|
ind=0
|
||
|
indexes={}
|
||
|
while ind<len(lines):
|
||
|
if '###' in lines[ind]: #Enters a section
|
||
|
name=lines[ind][3:].strip()
|
||
|
ind,indexes[name]=section_to_dict(lines,ind+1+1+1)
|
||
|
ind+=1
|
||
|
return indexes
|
||
|
|
||
|
def link_to_error_file(error_links): #Enters the bad links to a file which further requires manual check
|
||
|
lines=[]
|
||
|
for row in error_links:
|
||
|
statement='| {} | [{}]({}) | {} |'.format(row['index'], row['title'], row['link'], str(row['error']))
|
||
|
lines.append(statement)
|
||
|
with open('error.txt','w') as file:
|
||
|
file.write('\n#Manual check has to be done on following links#\n\n')
|
||
|
file.write('| Section | API |Error/Satus Code |\n')
|
||
|
file.write('|---|---|---|\n')
|
||
|
for line in lines:
|
||
|
file.write(line)
|
||
|
file.write('\n')
|
||
|
print("Written to file")
|
||
|
print('Manual check has to be done for the links saved in error.txt')
|
||
|
|
||
|
location= input('Location of readme public api readme file: ') #Get location of raw readme file
|
||
|
lines=get_lines_from_file(location)
|
||
|
indexes=get_section_wise_dict(lines)
|
||
|
error_links=func(indexes)
|
||
|
link_to_error_file(error_links)
|