Psst.. new poll here.
Psst.. new forums here.
Microsoft is blocking us again (TY IP Reputation!) so just use oauth login instead. :)
Paste
Pasted as Python by Mohammad ( 7 years ago )
from difflib import SequenceMatcher
import json
from haralyzer import HarParser, HarPage
import diffcomp
import os
import csv
from bs4 import BeautifulSoup
from requests_toolbelt.multipart import decoder
import requests.structures
from py2neo import Graph as grph
from py2neo import Node, Relationship
graph = grph("bolt://localhost:7687", user="neo4j", password="test")
# graph = grph("bolt://localhost:7688", user="neo4j", password="pass")
with open('dumpelgg.har', 'r') as f:
har_parser = HarParser(json.loads(f.read()))
ignore = 'username' or 'password' or 'author' or 'email' or 'e-mail' or 'login'
param_names_formatted = []
param_values_formatted = []
post_req_url = ''
y_reURL_check = ''
temp = 0
datardf = ''
y_url_list = []
y_compare_content = dict()
check = dict()
y_tag_compare = dict()
dict_action = dict() # can be removed
dict_list = dict()
dict_dict_list = dict()
dict_l = dict()
list_final = []
g_req = 1
param_total = {}
param_total_graph = {}
y_param_dict = {}
node_dict = {}
node_response = {}
request_number = ''
cn = 0
param_node_li = []
request_node_dict = {}
do_dict = {}
do_req_dict = {}
csv_file = 'output.csv'
if os.path.isfile('result.txt'):
os.remove('result.txt')
if os.path.isfile(csv_file):
os.remove(csv_file)
with open('output.csv', 'a') as output:
writer = csv.writer(output)
writer.writerow(['User_Data', 'Meta_Data'])
do_count = 0
for i in range(0, len(har_parser.har_data['entries'])):
# if param_values_formatted:
# print(param_values_formatted)
bool_param = False
param_names = []
param_values = []
x = har_parser.har_data['entries'][i]
x = json.dumps(x)
y = json.loads(x)
y_url = y['request']['url']
y_referrer = y['request']['headers']
y_reURL = y['response']['redirectURL']
y_resContent = y['response']['content']['text']
if y_url not in y_compare_content.keys():
# print(y_url)
y_compare_content[y_url] = [y_resContent]
request_node = Node('Request_Node', name='Request', req_no=i)
request_node_dict['Request'+'_'+str(i)] = request_node
# request_node_dict['Request'] = request_node
request_url = Node('Request_URL', name=y_url)
graph.create(Relationship(request_node, "hasURL", request_url))
# Checks for fragment in URL i.e. the part after the # is not sent to the server (302 directs).
if y_reURL_check.partition('#')[0] == y_url:
# todo Find a way to ignore the unwanted params
if param_values_formatted:
if 'username' and 'password' not in param_names_formatted:
# if y_reURL:
# print(y_reURL)
for ref_count in range(0, len(y_referrer)):
if y_referrer[ref_count].get('name') == 'Referer':
y_url_list.append((y_referrer[ref_count].get('value'), y_resContent))
if not y_reURL:
# print(param_values_formatted)
param_node = Node('Param_value', name=param_values_formatted)
param_object_node = Node("Param_object", name="Params", param_no=i)
do_count = do_count+1
do_req_dict['Request'+'_'+str(i)] = do_count
data_object_node = Node("Data_object", name="Data object", do_no=do_count)
if 'Request'+'_'+str(i) not in do_dict.keys():
do_dict['Request'+'_'+str(i)] = data_object_node
param_node_li.append(param_node)
# graph.create(Relationship(request_node, "hasParam", param_node))
graph.create(Relationship(request_node, "hasParam", param_object_node))
param_count = 0
for param in y_param_dict:
param_count = param_count + 1
param_value_node = Node('param_node_values', name=param["value"], )
print(param_value_node)
if param["value"] not in node_dict.keys():
node_dict[param["value"]] = param_value_node
if param["value"] in node_dict.keys():
graph.create(Relationship(param_object_node,
param["name"],
node_dict.get(param['value'])))
graph.create(Relationship(data_object_node,
'data-level',
node_dict.get(param['value']), data_object=do_count))
else:
graph.create(Relationship(param_object_node,
param["name"],
param_value_node))
graph.create(Relationship(data_object_node,
'data-level',
param_value_node, data_object=do_count))
# Relationship(param_object_node, param["name"], param_value_node)
response_node = Node('Node_response', name='Response', resno=i)
request_number = 'Request'+'_'+str(i)
node_response[request_number] = response_node
graph.create(Relationship(request_node, "hasResponse", response_node))
graph.create(Relationship(request_node, "hasParam", param_object_node))
if y_referrer[ref_count].get('value') in y_compare_content:
# append the new content to the existing key at this slot
y_compare_content[y_referrer[ref_count].get('value')].append(y_resContent)
else:
# create a new list in this slot
y_compare_content[y_referrer[ref_count].get('value')] = [y_resContent]
# Check difference and compare
diffcomp.compare(y_referrer[ref_count].get('value'), param_values_formatted,
y_compare_content, y_tag_compare, post_req_url, dict_list, dict_dict_list,
y_param_dict, dict_action, dict_l, list_final, request_number)
soup_param = BeautifulSoup(param_values_formatted[2], 'html.parser')
param_total.setdefault(post_req_url, []).append(soup_param.text.replace(" ", "_"))
param_total_graph.setdefault(post_req_url, []).append(param_values_formatted)
g_req += 1
# print(y_compare_content)
# print("------------------------------")
if not y_reURL:
param_names_formatted = []
param_values_formatted = []
y_param_dict = {}
# print(y_reURL)
y_param = {}
if 'postData' in y['request']:
if 'params' in y['request']['postData']:
y_param = y['request']['postData']['params']
if not y_param:
param_text = y['request']['postData']['text']
if y['request']['postData']['mimeType'] == "application/json;charset=utf-8":
d = json.loads(param_text)
params = []
for dk in d:
params.append(
{"name": dk, "value": str(d[dk])}
)
else:
param_text_byte = str.encode(param_text)
get_boundary_value = str(param_text).partition('\r\n')[0]
get_boundary_value = (len(get_boundary_value.split('-'))-3)*'-' + get_boundary_value.split('-')[-1]
content_type = "multipart/form-data; boundary=%s" % (get_boundary_value)
mpd = decoder.MultipartDecoder(param_text_byte, content_type)
params = []
for part in mpd.parts:
for k in requests.structures.CaseInsensitiveDict(part.headers).keys():
val = requests.structures.CaseInsensitiveDict(part.headers).get(k).decode("utf-8")
d = val.split('=')
a = d[1]
val = part.content.decode("utf-8")
b = val
params.append(
{"name": a, "value": b}
)
y_param = params
post_req_url = y['request']['url']
#print(len(y_param))
bool_param = True
y_param_dict = y_param
i += 1
if bool_param:
for j in range(0,len(y_param)):
param_names.append(y_param[j].get('name'))
param_values.append(y_param[j].get('value'))
#print(param_lists)
for pn in param_names:
param_names_formatted.append("".join([s for s in pn.strip().splitlines(True) if s.strip("\r\n").strip()]))
for pv in param_values:
param_values_formatted.append("".join([s for s in pv.strip().splitlines(True) if s.strip("\r\n").strip()]))
y_reURL_check = y_reURL
dict_dict_l = {}
for d in dict_dict_list:
print(d)
for val in dict_dict_list.get(d):
tag_list = []
for l in val[1]:
tag_list.append(l[0])
dict_dict_l.setdefault(d, []).append((val[0], tag_list))
common_dom = []
test_dict = {}
for key in dict_dict_l.keys():
common_list = []
print(key)
for i in range(0, len(dict_dict_l.get(key))):
count = 1
if i == 0:
common_list.append(dict_dict_l.get(key)[i][1])
if len(common_list) == 1 and i != 0:
common_list.append(dict_dict_l.get(key)[i][1])
lss = SequenceMatcher(None, common_list[0], common_list[1]).find_longest_match(0, len(common_list[0]), 0,
len(common_list[1]))
common_list = [common_list[0][lss.a:lss.a + lss.size]]
common_dom = common_list
test_dict[key] = common_dom
matches = {}
for key in dict_dict_l.keys():
print(key)
for li in dict_dict_l.get(key):
lss = SequenceMatcher(None, li[1],
test_dict[key][0]).find_longest_match(0, len(li[1]), 0, len(test_dict[key][0]))
print(lss)
matches.setdefault(key, []).append(lss)
print("\n")
filtered_list = []
fl_dict = {}
i = 0
for key in dict_dict_list.keys():
print(key)
for lf, match in zip(dict_dict_list.get(key), matches.get(key)):
filtered_list.append(lf[1][match.a:match.a+match.size])
fl_dict.setdefault(key, []).append((lf[0], lf[1][match.a:match.a+match.size]))
print(fl_dict)
print([lf[1][match.a:match.a+match.size]])
i += 1
fl_sub = []
final = []
final_dict = {}
for k in fl_dict.keys():
for val in fl_dict.get(k):
for j in range(0, len(val[1])):
val_sub = val[1][j] # [1]
for v in val_sub:
if type(v) is tuple:
for key in v[0]:
fl_sub.append(key + ':' + str(v[0][key]))
final.append(fl_sub)
final_dict.setdefault(k, []).append((val[0], fl_sub))
fl_sub = []
from collections import Counter
from itertools import chain
final = []
final_dt = {}
req = []
for k in final_dict.keys():
for val in final_dict.get(k):
final.append(val[1])
req.append([val[0]])
counts = Counter(chain(*map(set, final)))
ul = [[i for i in sublist if counts[i] == 1] for sublist in final]
for j in range(0, len(ul)):
final_dt.setdefault(k, []).append((req[j], ul[j]))
req = []
final = []
# todo remove
# counts = Counter(chain(*map(set, final)))
# ul = [[i for i in sublist if counts[i] == 1] for sublist in final]
# for u in ul:
# print(u)
# print("\n")
i = 0
meta_dict = {}
local_dom_list = []
value_node_dict = {}
att_val_l = []
data_level_c = 0
for k in final_dt:
for val in final_dt.get(k):
print(val)
local_dom_list.append(val)
for j in range(0, len(val[1])):
att_val = str(val[1][j]).partition(':')[2]
param_node = Node('Param_values', name=val)
value_node = Node("attr_values", name=att_val)
meta_dict.setdefault(str(val[1][j]).partition(':')[2], []).append(value_node)
if att_val in node_dict.keys():
graph.create(Relationship(node_response.get(val[0][0]),
str(val[1][j]).partition(':')[0],
node_dict.get(att_val)))
graph.create(Relationship(do_dict.get(val[0][0]),
'data-level', node_dict.get(att_val),
data_object=do_req_dict.get(val[0][0])))
else:
value_node_dict[str(val[1][j]).partition(':')[2]] = value_node
graph.create(Relationship(node_response.get(val[0][0]),
str(val[1][j]).partition(':')[0],
value_node))
graph.create(Relationship(do_dict.get(val[0][0]),
'data-level',
value_node, data_object=do_req_dict.get(val[0][0])))
# GET Requests (after Insertion Request)
index_list = []
y_reURL_check = ''
for i in range(0, len(har_parser.har_data['entries'])):
x = har_parser.har_data['entries'][i]
x = json.dumps(x)
y = json.loads(x)
y_url = y['request']['url']
y_referrer = y['request']['headers']
y_reURL = y['response']['redirectURL']
y_resContent = y['response']['content']['text']
if y_reURL_check.partition('#')[0] == y_url:
index_list.append(i)
y_reURL_check = y_reURL
get_req_test = Node('get_request', name="get_req_test")
get_url = ''
# count = 1
for j in range(len(har_parser.har_data['entries'])-1, 0, -1):
x = har_parser.har_data['entries'][j]
x = json.dumps(x)
y = json.loads(x)
y_url = y['request']['url']
y_referrer = y['request']['headers']
attrs_name = []
if j not in index_list and y['request']['method'] == "GET":
y_resContent = str(y['response']['content']['text'])
print(y['request']['url'])
# print(y_resContent)
soup = BeautifulSoup(y_resContent, 'html.parser')
for elm in soup():
# print(elm.attrs)
attrs_name.append(elm.attrs)
list_n1 = []
for tag in soup.findAll():
# list_n1.append((tag.name, (tag.attrs, tag.values)))
# print(tag.attrs)
for tg in tag.attrs:
# print(tag.attrs.get(tg))
if type(tag.attrs.get(tg)) is list:
for val in tag.attrs.get(tg):
list_n1.append(tg + ":" + val)
else:
list_n1.append(tg+":"+tag.attrs.get(tg))
response_node = Node('GET_Response', name='Response', req_no=j)
graph.create(Relationship(request_node_dict.get("Request"+"_"+str(j)), "hasResponse", response_node))
set_l = []
match_count = []
attr_count = 1
for temp in list_n1:
attr_key = str(temp).partition(':')[0]
attr_count = attr_count + 1
if str(temp).partition(':')[2] in value_node_dict.keys():
graph.create(Relationship(response_node, attr_key,
value_node_dict.get(str(temp).partition(':')[2]), att_order=attr_count))
else:
val_node = Node('attr_values', name=str(temp).partition(':')[2])
graph.create(Relationship(response_node, attr_key, val_node, att_order=attr_count ))
Revise this Paste