-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsearch.py
More file actions
179 lines (142 loc) · 7.31 KB
/
search.py
File metadata and controls
179 lines (142 loc) · 7.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import traceback
import sys
import state
from comments import get_videos_comments_and_commenters
from channels import get_videos_and_videocreators
from network import export_network_file
from videos import create_video_metadata
from utils import get_filename
from utils import export_dict_to_excel
#*****************************************************************************************************
#This function executes a search on youtube (similar to the one in youtube search bar) to retrieves
#the videos that match the query given as a paramter.
#The search is based on relevance to the query
#*****************************************************************************************************
def get_videos_id_by_query(youtube, query, numberVideosToRetrieve):
nextPageToken = None
videos_ids=[]
maxResults = state.MAX_SEARCH_RESULTS_PER_REQUEST
if numberVideosToRetrieve<maxResults:
maxResults = numberVideosToRetrieve
count = 0
try:
while True:
if not state.under_quote_limit(state.state_yt,state.UNITS_SEARCH_LIST):
return
video_channels_request = youtube.search().list(
part="snippet",
q=query,
type="video",
maxResults=maxResults,
order="relevance",
pageToken=nextPageToken
)
state.state_yt = state.update_quote_usage(state.state_yt, state.UNITS_SEARCH_LIST)
response_videos_channels = video_channels_request.execute()
# Obtain video_id for each video in the response
for item in response_videos_channels['items']:
videoId = item["id"].get("videoId", "N/A")
videos_ids.append(videoId)
count = count + 1
nextPageToken = response_videos_channels.get('nextPageToken')
if not nextPageToken or count >= numberVideosToRetrieve:
# if not nextPageToken:
break;
except:
print("Error on get_videos_id_by_query")
print(sys.exc_info()[0])
traceback.print_exc()
return videos_ids
#*****************************************************************************************************
#This functions searches for the videos that matches a query in youtube
#It returns the metadata for the files
#*****************************************************************************************************
def get_videos_by_keyword_metadata(youtube, query):
records = {}
nextPageToken = None
count = 1
pages = 1
try:
while True:
video_channels_request = youtube.search().list(
part="snippet",
q=query,
type="video",
maxResults=state.MAX_SEARCH_RESULTS_PER_REQUEST,
order="relevance",
pageToken=nextPageToken
)
state.state_yt = state.update_quote_usage(state.state_yt, state.UNITS_SEARCH_LIST)
response_videos_channels = video_channels_request.execute()
# Obtain video_id for each video in the response
videos_ids = []
for item in response_videos_channels['items']:
videoId = item["id"].get("videoId", "N/A")
videos_ids.append(videoId)
# Request all videos
videos_request = youtube.videos().list(
part="contentDetails,snippet,statistics",
id=','.join(videos_ids)
)
state.state_yt = state.update_quote_usage(state.state_yt, state.UNITS_VIDEOS_LIST)
videos_response = videos_request.execute()
for item in videos_response['items']:
metadata = create_video_metadata(item)
print('{} - Video {}'.format(count, item["id"]))
records[count] = metadata
count = count + 1
nextPageToken = response_videos_channels.get('nextPageToken')
pages = pages + 1
if not nextPageToken or pages == 3:
# if not nextPageToken:
break;
except:
print("Error on get_videos_by_keyword_metadata ")
print(sys.exc_info()[0])
traceback.print_exc()
# Export info to excel
#export_dict_to_excel(records, 'queries_videos.xlsx')
filename = get_filename('videos_query_metadata','xlsx')
export_dict_to_excel(records, 'output', filename)
print ("Output is in: " + filename)
#*****************************************************************************************************
#This function searches videos on youtube that match a query (by relevance)
#The argument network specifies if the whole network will be built (videos and parameters).
# To build a network, videos, creators, comments and commenters are needed.
#If the network parameter is None, the function will only retrieve videos and its creators.
#only the videos and its creators will be retrieved.
#The maxNumberVideos given as parameter is optional. The default is 100. This parameter should be
#on multiples of 50.
#*****************************************************************************************************
def search_videos_youtube(youtube, query, maxNumberVideos=None, network=None, interactive=True):
print ("Executing query/YouTube search ")
videos_with_quote = state.number_of_items_with_quote(state.UNITS_SEARCH_LIST, state.MAX_SEARCH_RESULTS_PER_REQUEST)
if (not maxNumberVideos) or (maxNumberVideos<0) or (maxNumberVideos >videos_with_quote):
if state.DEFAULT_VIDEOS_TO_RETRIEVE < videos_with_quote:
maxNumberVideos= state.DEFAULT_VIDEOS_TO_RETRIEVE
else:
maxNumberVideos = videos_with_quote
if interactive:
cost = state.total_requests_cost(maxNumberVideos,state.MAX_SEARCH_RESULTS_PER_REQUEST,state.UNITS_SEARCH_LIST)
proceed = input ("Search {} videos for query \"{}\" with a cost of {} units [Y/N]? ".format(maxNumberVideos,query,cost))
if proceed.upper() != "Y":
sys.exit()
videos_ids = get_videos_id_by_query(youtube, query,maxNumberVideos)
videos_ids = list(set(videos_ids))
if videos_ids and len(videos_ids)>0:
state.add_action(state.state_yt, state.ACTION_RETRIEVE_VIDEOS)
if network:
state.add_action(state.state_yt, state.ACTION_RETRIEVE_COMMENTS)
state.add_action(state.state_yt, state.ACTION_CREATE_NETWORK)
print ('Getting video and creators metadata ')
videos_records = get_videos_and_videocreators(youtube, videos_ids, "search_" + query + "_videos_creators")
if network:
print ('Getting comments and commenters metadata ')
comments_records = get_videos_comments_and_commenters(youtube, videos_ids, "search_" + query + "_comments_commenters")
#we need to do this ONLY WHEN ALL THE COMMENTS HAVE BEEN RETRIEVED
if videos_records and comments_records:
if state.state_yt[state.ALL_VIDEOS_RETRIEVED] and state.state_yt[state.ALL_COMMENTS_RETRIEVED]:
print ('Exporting network file ')
output_file = export_network_file("search_" + query, videos_records=videos_records, comments_records=comments_records)
if output_file and len(output_file)>0:
print("Output is in :" + output_file)