From 24816edb3365392194a1b22d9dcb76d09d653e15 Mon Sep 17 00:00:00 2001 From: Alexandr Nesterenko Date: Wed, 16 Dec 2015 17:01:45 +0000 Subject: [PATCH] first version of selection calculation is ready --- frontend/frontend/setup_tool.py | 117 +++++++++++++++++++++----------- 1 file changed, 78 insertions(+), 39 deletions(-) diff --git a/frontend/frontend/setup_tool.py b/frontend/frontend/setup_tool.py index f27d212..68cff31 100644 --- a/frontend/frontend/setup_tool.py +++ b/frontend/frontend/setup_tool.py @@ -1,87 +1,128 @@ - +I_TAGNAME = 0 +I_ATTRS = 1 +I_CHILDREN = 2 +I_PARENT = 3 # not in use def build_xpathes(item_tag_ids, html_json): shared_tag_stack = []; -def build_parent_stack(html_json, tag_id): +def _build_parent_stack(html_json, tag_id): tag_stack = [] def walk_by_tag(tag): - if (tag[1]['tag-id'] == tag_id): + if (tag[I_ATTRS]['tag-id'] == tag_id): return True else: - for subtag in tag[2]: + for subtag in tag[I_CHILDREN]: if walk_by_tag(subtag): tag_stack.append(subtag) return True return False walk_by_tag(html_json) + + tag_stack.append(html_json) + return list(reversed(tag_stack)) -def find_tags_by_tag_names(html_json, parent_tag_names): +def _find_tags_by_tag_names(html_json, parent_tag_names): + tags = [] + tag_stack = [] - tag_ids = [] - - def walk_by_tag(tag): - depth = len(tag_ids) - if tag[0] == parent_tag_names[depth]: + def walk_by_tag(tag, depth): + tag_stack.append(tag) + if tag[I_TAGNAME] == parent_tag_names[depth]: if depth == len(parent_tag_names)-1: # is a tie - tag_ids.append(tag[1]['tag-id']) - elif depth < len(parent_tag_names): - for subtag in tag[2]: - walk_by_tag(subtag) - return tag_ids + tags.append((tag, list(tag_stack))) + elif depth < len(parent_tag_names)-1: + for subtag in tag[I_CHILDREN]: + walk_by_tag(subtag, depth+1) + tag_stack.pop() + walk_by_tag(html_json, 0) + return tags # allusion to xpath class PathItem: go_parent = False - go_child_tag = None + child_tag = None child_index = None + def __init__(self, go_parent=False, child_tag=None, child_index=None): + self.go_parent = go_parent + self.child_tag = child_tag + self.child_index = child_index + + def __repr__(self): + return '..' if self.go_parent else '%s[%s]' % (self.child_tag, self.child_index+1) + + def _build_path(stack, target_stack): fork = None for fork_i in xrange(0, len(stack)): if stack[fork_i] == target_stack[fork_i]: fork = stack[fork_i] else: + fork_i -= 1 break - + path = [] + # shifts to parent; like '..' in xpath for i in xrange(fork_i, len(stack)): path.append(PathItem(go_parent=True)) - for i in xrange(fork_i, len(target_stack)-1): + # address by children with indexes; like 'tag[n]' in xpath + for i in xrange(fork_i, len(target_stack)): tag = target_stack[i] - tag_name = tag[0] + tag_name = tag[I_TAGNAME] parent = target_stack[i-1] - children = parent[2] + tags = parent[I_CHILDREN] idx = 0 - for j in xrange(0, len(children)): - if children[j][0] == tag_name: - idx += 1 - if children[j] == tag: + for tag_ in tags: + if tag_[I_TAGNAME] == tag_name: + if tag_ == tag: break - path.append(PathItem(go_child_tag=tag_name, child_index=idx)) + idx += 1 + path.append(PathItem(child_tag=tag_name, child_index=idx)) return path - + +def _find_tag(html_json, source_tag_info, path): + tag = source_tag_info[0] + tag_stack = source_tag_info[1] + stack_i = len(tag_stack)-1 + + for step in path: + if step.go_parent: + stack_i -= 1 + tag = tag_stack[stack_i] + else: + idx = step.child_index + next = None + for child in tag[I_CHILDREN]: + if child[I_TAGNAME] == step.child_tag: + if idx == 0: + next = child + break + idx -= 1 + if next is None: + return None + tag = next + return tag def get_selection_tag_ids(item_tag_ids, html_json): parent_stacks = {} - import pdb; pdb.set_trace() # buld parent stacks for every item name for name in item_tag_ids: tag_id = item_tag_ids[name] - parent_stacks[name] = build_parent_stack(html_json, tag_id) + parent_stacks[name] = _build_parent_stack(html_json, tag_id) # get first item and get his path first_name, parent_stack = parent_stacks.popitem() - parent_tag_names = [tag[0] for tag in parent_stack] + parent_tag_names = [tag[I_TAGNAME] for tag in parent_stack] # find tags for first item - tags = find_tags_by_tag_names(html_json, parent_tag_names) + tags = _find_tags_by_tag_names(html_json, parent_tag_names) # get pathes for another items selection_pathes = {} @@ -89,21 +130,19 @@ def get_selection_tag_ids(item_tag_ids, html_json): selection_pathes[name] = _build_path(parent_stack, parent_stacks[name]) # get selection ids - selection_ids = [name:[] for name in item_tag_ids] - for source_tag in tags: - ids = [] + selection_ids = {name:[] for name in item_tag_ids} + for tag_info in tags: + ids = {} for name in selection_pathes: - tag = _find_tag(html_json, source_tag, selection_path[name]) + tag = _find_tag(html_json, tag_info, selection_pathes[name]) if tag is not None: - ids[name] = tag[T_ATTRS]['tag-id']) - else + ids[name] = tag[I_ATTRS]['tag-id'] + else: ids = None break if ids is not None: - selection_ids[first_name].append(source_tag[T_ATTRS]['tag-id']) + selection_ids[first_name].append(tag_info[0][I_ATTRS]['tag-id']) for name in selection_pathes: selection_ids[name].append(ids[name]) return selection_ids - - return { name: tag_ids }