Welcome, guest! Login / Register - Why register?
Psst.. new poll here.
Psst.. new forums here.
Microsoft is blocking us again (TY IP Reputation!) so just use oauth login instead. :)

Paste

Pasted as Python by Mohammad ( 7 years ago )
from difflib import SequenceMatcher
import json
from haralyzer import HarParser, HarPage
import diffcomp
import os
import csv
from bs4 import BeautifulSoup
from requests_toolbelt.multipart import decoder
import requests.structures

from py2neo import Graph as grph
from py2neo import Node, Relationship

graph = grph("bolt://localhost:7687", user="neo4j", password="test")
# graph = grph("bolt://localhost:7688", user="neo4j", password="pass")

with open('dumpelgg.har', 'r') as f:
    har_parser = HarParser(json.loads(f.read()))

ignore = 'username' or 'password' or 'author' or 'email' or 'e-mail' or 'login'
param_names_formatted = []
param_values_formatted = []
post_req_url = ''
y_reURL_check = ''
temp = 0
datardf = ''
y_url_list = []
y_compare_content = dict()
check = dict()
y_tag_compare = dict()
dict_action = dict()  # can be removed
dict_list = dict()
dict_dict_list = dict()
dict_l = dict()
list_final = []
g_req = 1
param_total = {}
param_total_graph = {}
y_param_dict = {}
node_dict = {}
node_response = {}
request_number = ''
cn = 0
param_node_li = []
request_node_dict = {}
do_dict = {}
do_req_dict = {}

csv_file = 'output.csv'
if os.path.isfile('result.txt'):
    os.remove('result.txt')

if os.path.isfile(csv_file):
    os.remove(csv_file)

with open('output.csv', 'a') as output:
    writer = csv.writer(output)
    writer.writerow(['User_Data', 'Meta_Data'])

do_count = 0
for i in range(0, len(har_parser.har_data['entries'])):

    # if param_values_formatted:
    #     print(param_values_formatted)

    bool_param = False
    param_names = []
    param_values = []

    x = har_parser.har_data['entries'][i]
    x = json.dumps(x)

    y = json.loads(x)
    y_url = y['request']['url']

    y_referrer = y['request']['headers']

    y_reURL = y['response']['redirectURL']
    y_resContent = y['response']['content']['text']

    if y_url not in y_compare_content.keys():
        # print(y_url)
        y_compare_content[y_url] = [y_resContent]

    request_node = Node('Request_Node', name='Request', req_no=i)
    request_node_dict['Request'+'_'+str(i)] = request_node

    # request_node_dict['Request'] = request_node

    request_url = Node('Request_URL', name=y_url)

    graph.create(Relationship(request_node, "hasURL", request_url))

    # Checks for fragment in URL i.e. the part after the # is not sent to the server (302 directs).

    if y_reURL_check.partition('#')[0] == y_url:
        # todo Find a way to ignore the unwanted params
        if param_values_formatted:
            if 'username' and 'password' not in param_names_formatted:
                # if y_reURL:
                #     print(y_reURL)
                for ref_count in range(0, len(y_referrer)):
                    if y_referrer[ref_count].get('name') == 'Referer':
                        y_url_list.append((y_referrer[ref_count].get('value'), y_resContent))
                        if not y_reURL:
                            # print(param_values_formatted)
                            param_node = Node('Param_value', name=param_values_formatted)
                            param_object_node = Node("Param_object", name="Params", param_no=i)
                            do_count = do_count+1
                            do_req_dict['Request'+'_'+str(i)] = do_count

                            data_object_node = Node("Data_object", name="Data object", do_no=do_count)
                            if 'Request'+'_'+str(i) not in do_dict.keys():
                                do_dict['Request'+'_'+str(i)] = data_object_node

                            param_node_li.append(param_node)
                            # graph.create(Relationship(request_node, "hasParam", param_node))
                            graph.create(Relationship(request_node, "hasParam", param_object_node))
                            param_count = 0
                            for param in y_param_dict:
                                param_count = param_count + 1
                                param_value_node = Node('param_node_values', name=param["value"], )
                                print(param_value_node)

                                if param["value"] not in node_dict.keys():
                                    node_dict[param["value"]] = param_value_node
                                if param["value"] in node_dict.keys():
                                    graph.create(Relationship(param_object_node, 
                                                              param["name"], 
                                                              node_dict.get(param['value'])))
                                    graph.create(Relationship(data_object_node, 
                                                              'data-level', 
                                                              node_dict.get(param['value']), data_object=do_count))
                                else:
                                    graph.create(Relationship(param_object_node, 
                                                              param["name"], 
                                                              param_value_node))
                                    graph.create(Relationship(data_object_node, 
                                                              'data-level', 
                                                              param_value_node, data_object=do_count))

                                # Relationship(param_object_node, param["name"], param_value_node)
                            response_node = Node('Node_response', name='Response', resno=i)
                            request_number = 'Request'+'_'+str(i)
                            node_response[request_number] = response_node
                            graph.create(Relationship(request_node, "hasResponse", response_node))
                            graph.create(Relationship(request_node, "hasParam", param_object_node))
                            if y_referrer[ref_count].get('value') in y_compare_content:
                                # append the new content to the existing key at this slot
                                y_compare_content[y_referrer[ref_count].get('value')].append(y_resContent)
                            else:
                                # create a new list in this slot
                                y_compare_content[y_referrer[ref_count].get('value')] = [y_resContent]

                            # Check difference and compare
                            diffcomp.compare(y_referrer[ref_count].get('value'), param_values_formatted, 
                                             y_compare_content, y_tag_compare, post_req_url, dict_list, dict_dict_list, 
                                             y_param_dict, dict_action, dict_l, list_final, request_number)

                            soup_param = BeautifulSoup(param_values_formatted[2], 'html.parser')
                            param_total.setdefault(post_req_url, []).append(soup_param.text.replace(" ", "_"))
                            param_total_graph.setdefault(post_req_url, []).append(param_values_formatted)
                            g_req += 1
                            # print(y_compare_content)
                            # print("------------------------------")

    if not y_reURL:
        param_names_formatted = []
        param_values_formatted = []
        y_param_dict = {}

    # print(y_reURL)

    y_param = {}
    if 'postData' in y['request']:
        if 'params' in y['request']['postData']:
            y_param = y['request']['postData']['params']
            if not y_param:
                param_text = y['request']['postData']['text']
                if y['request']['postData']['mimeType'] == "application/json;charset=utf-8":
                    d = json.loads(param_text)
                    params = []
                    for dk in d:
                        params.append(
                            {"name": dk, "value": str(d[dk])}
                        )
                else:
                    param_text_byte = str.encode(param_text)
                    get_boundary_value = str(param_text).partition('\r\n')[0]
                    get_boundary_value = (len(get_boundary_value.split('-'))-3)*'-' + get_boundary_value.split('-')[-1]
                    content_type = "multipart/form-data; boundary=%s" % (get_boundary_value)

                    mpd = decoder.MultipartDecoder(param_text_byte, content_type)

                    params = []
                    for part in mpd.parts:
                        for k in requests.structures.CaseInsensitiveDict(part.headers).keys():
                            val = requests.structures.CaseInsensitiveDict(part.headers).get(k).decode("utf-8")
                            d = val.split('=')
                            a = d[1]
                            val = part.content.decode("utf-8")
                            b = val
                            params.append(
                                {"name": a, "value": b}
                            )
                y_param = params

            post_req_url = y['request']['url']
            #print(len(y_param))
            bool_param = True
            y_param_dict = y_param

    i += 1

    if bool_param:
        for j in range(0,len(y_param)):
            param_names.append(y_param[j].get('name'))
            param_values.append(y_param[j].get('value'))

    #print(param_lists)
    for pn in param_names:
        param_names_formatted.append("".join([s for s in pn.strip().splitlines(True) if s.strip("\r\n").strip()]))

    for pv in param_values:
        param_values_formatted.append("".join([s for s in pv.strip().splitlines(True) if s.strip("\r\n").strip()]))

    y_reURL_check = y_reURL

dict_dict_l = {}

for d in dict_dict_list:
    print(d)
    for val in dict_dict_list.get(d):
        tag_list = []
        for l in val[1]:
            tag_list.append(l[0])
        dict_dict_l.setdefault(d, []).append((val[0], tag_list))

common_dom = []
test_dict = {}
for key in dict_dict_l.keys():
    common_list = []
    print(key)

    for i in range(0, len(dict_dict_l.get(key))):
        count = 1
        if i == 0:
            common_list.append(dict_dict_l.get(key)[i][1])
        if len(common_list) == 1 and i != 0:
            common_list.append(dict_dict_l.get(key)[i][1])
            lss = SequenceMatcher(None, common_list[0], common_list[1]).find_longest_match(0, len(common_list[0]), 0,
                                                                                           len(common_list[1]))
            common_list = [common_list[0][lss.a:lss.a + lss.size]]
            common_dom = common_list
            test_dict[key] = common_dom

matches = {}

for key in dict_dict_l.keys():
    print(key)
    for li in dict_dict_l.get(key):
        lss = SequenceMatcher(None, li[1], 
                              test_dict[key][0]).find_longest_match(0, len(li[1]), 0, len(test_dict[key][0]))
        print(lss)
        matches.setdefault(key, []).append(lss)
print("\n")
filtered_list = []
fl_dict = {}
i = 0


for key in dict_dict_list.keys():
    print(key)
    for lf, match in zip(dict_dict_list.get(key), matches.get(key)):
        filtered_list.append(lf[1][match.a:match.a+match.size])
        fl_dict.setdefault(key, []).append((lf[0], lf[1][match.a:match.a+match.size]))
        print(fl_dict)
        print([lf[1][match.a:match.a+match.size]])
        i += 1

fl_sub = []
final = []
final_dict = {}
for k in fl_dict.keys():
    for val in fl_dict.get(k):
        for j in range(0, len(val[1])):
            val_sub = val[1][j] # [1]
            for v in val_sub:
                if type(v) is tuple:
                    for key in v[0]:
                        fl_sub.append(key + ':' + str(v[0][key]))
        final.append(fl_sub)
        final_dict.setdefault(k, []).append((val[0], fl_sub))
        fl_sub = []

from collections import Counter
from itertools import chain

final = []
final_dt = {}
req = []
for k in final_dict.keys():
    for val in final_dict.get(k):
        final.append(val[1])
        req.append([val[0]])
    counts = Counter(chain(*map(set, final)))
    ul = [[i for i in sublist if counts[i] == 1] for sublist in final]
    for j in range(0, len(ul)):
        final_dt.setdefault(k, []).append((req[j], ul[j]))
    req = []
    final = []

# todo remove
# counts = Counter(chain(*map(set, final)))
# ul = [[i for i in sublist if counts[i] == 1] for sublist in final]

# for u in ul:
#     print(u)
# print("\n")

i = 0
meta_dict = {}

local_dom_list = []
value_node_dict = {}
att_val_l = []
data_level_c = 0
for k in final_dt:
    for val in final_dt.get(k):
        print(val)
        local_dom_list.append(val)

        for j in range(0, len(val[1])):
            att_val = str(val[1][j]).partition(':')[2]
            param_node = Node('Param_values', name=val)
            value_node = Node("attr_values", name=att_val)
            meta_dict.setdefault(str(val[1][j]).partition(':')[2], []).append(value_node)

            if att_val in node_dict.keys():
                graph.create(Relationship(node_response.get(val[0][0]),
                                          str(val[1][j]).partition(':')[0],
                                          node_dict.get(att_val)))
                graph.create(Relationship(do_dict.get(val[0][0]),
                                          'data-level', node_dict.get(att_val),
                                          data_object=do_req_dict.get(val[0][0])))

            else:
                value_node_dict[str(val[1][j]).partition(':')[2]] = value_node
                graph.create(Relationship(node_response.get(val[0][0]),
                                          str(val[1][j]).partition(':')[0],
                                          value_node))
                graph.create(Relationship(do_dict.get(val[0][0]),
                                          'data-level',
                                          value_node, data_object=do_req_dict.get(val[0][0])))


# GET Requests (after Insertion Request)
index_list = []
y_reURL_check = ''
for i in range(0, len(har_parser.har_data['entries'])):
    x = har_parser.har_data['entries'][i]
    x = json.dumps(x)

    y = json.loads(x)
    y_url = y['request']['url']

    y_referrer = y['request']['headers']

    y_reURL = y['response']['redirectURL']
    y_resContent = y['response']['content']['text']

    if y_reURL_check.partition('#')[0] == y_url:
        index_list.append(i)

    y_reURL_check = y_reURL

get_req_test = Node('get_request', name="get_req_test")

get_url = ''
# count = 1
for j in range(len(har_parser.har_data['entries'])-1, 0, -1):
    x = har_parser.har_data['entries'][j]
    x = json.dumps(x)

    y = json.loads(x)
    y_url = y['request']['url']

    y_referrer = y['request']['headers']
    attrs_name = []

    if j not in index_list and y['request']['method'] == "GET":
        y_resContent = str(y['response']['content']['text'])

        print(y['request']['url'])
        # print(y_resContent)
        soup = BeautifulSoup(y_resContent, 'html.parser')
        for elm in soup():
            # print(elm.attrs)
            attrs_name.append(elm.attrs)

        list_n1 = []

        for tag in soup.findAll():
            # list_n1.append((tag.name, (tag.attrs, tag.values)))
            # print(tag.attrs)
            for tg in tag.attrs:
                # print(tag.attrs.get(tg))
                if type(tag.attrs.get(tg)) is list:
                    for val in tag.attrs.get(tg):
                        list_n1.append(tg + ":" + val)
                else:
                    list_n1.append(tg+":"+tag.attrs.get(tg))

        response_node = Node('GET_Response', name='Response', req_no=j)

        graph.create(Relationship(request_node_dict.get("Request"+"_"+str(j)), "hasResponse", response_node))

        set_l = []
        match_count = []

        attr_count = 1
        for temp in list_n1:
            attr_key = str(temp).partition(':')[0]
            attr_count = attr_count + 1
            if str(temp).partition(':')[2] in value_node_dict.keys():
                graph.create(Relationship(response_node, attr_key,
                                          value_node_dict.get(str(temp).partition(':')[2]), att_order=attr_count))
            else:
                val_node = Node('attr_values', name=str(temp).partition(':')[2])
                graph.create(Relationship(response_node, attr_key, val_node, att_order=attr_count ))

 

Revise this Paste

Your Name: Code Language: