-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtraverse_github_issues.py
More file actions
93 lines (63 loc) · 2.53 KB
/
Copy pathtraverse_github_issues.py
File metadata and controls
93 lines (63 loc) · 2.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import requests
from utils import parser
from tqdm import tqdm
import pandas as pd
import os
from dotenv import load_dotenv
from pprint import pprint
load_dotenv()
TOKEN_GITHUB = os.getenv('TOKEN_GITHUB')
headers = {
'Authorization': f'token {TOKEN_GITHUB}'
}
def fetch_data(data):
return requests.get(data, headers=headers).json()
def parse_comment(data):
if data is None:
return ""
comment_split = data.split('\n')
result = []
for line in comment_split:
line_space_free = line.replace(" ", "")
if line_space_free.startswith('>'):
pass
elif line_space_free.startswith('!['):
pass
else:
result.append(line)
return '\n'.join(result)
if __name__ == '__main__':
args = parser()
statuses = ['open', 'closed']
pbar = tqdm(total=len(statuses), desc="Fetching data") # <open|closed>
df = pd.DataFrame(columns=['title', 'issue_link', 'issue_desc', 'pull_request', 'number_comments', 'comments_link', 'comments',
'created_at', 'closed_at', 'status'])
for status in statuses:
pbar_page = tqdm(desc="Iterating pages...", leave=False)
for page in range(1, 1000):
url = "https://api.{}/issues?state={}&page={}".format(args.GITHUB, status, page)
data = fetch_data(url)
if(len(data) == 0):
break
if(len(data) < 5):
pprint(data)
pbar_page.update(1)
for issue in data:
title = issue['title']
html_url = issue['html_url']
number_comments = int(issue['comments'])
comments_link = issue['comments_url']
created_at = issue['created_at']
closed_at = issue['closed_at']
message = parse_comment(issue['body'])
pull_request = True if 'pull_request' in issue.keys() else False
comments = []
comments = "\n$$\n".join(comments)
new_row = {'title': title, 'issue_link': html_url, 'issue_desc': message, 'pull_request': pull_request,
'number_comments': number_comments, 'comments_link': comments_link, 'comments': comments,
'created_at': created_at, 'closed_at': closed_at, 'status': status}
df.loc[len(df)] = new_row
pbar_page.close()
pbar.update(1)
pbar.close()
df.to_csv("{}/{}_github.csv".format(args.path_logs, args.path), index=False)