import csv from pathlib import Path from pprint import PrettyPrinter from urllib.parse import urlencode from urllib.request import urlopen from urllib.request import Request import time from bs4 import BeautifulSoup import re import json pprint = PrettyPrinter().pprint DIR = Path("spotify_playlists") DB = {} for playlist in DIR.glob("*.csv"): with playlist.open(encoding="utf-8") as fobj: reader = csv.reader(fobj) for lineno, line in enumerate(reader): if lineno == 0: continue _id = line[0] if _id not in DB: DB[_id] = line[1:] d = list(Path("Z:/nms").glob("*")) with open("not-found.txt", "w+", encoding="utf-8") as fobj: for song_id, song_data in DB.items(): for _file in d: if song_data[2] in _file.name: print("OK {}".format(song_data[0])) break else: print("ytsearch1:{} {}".format(song_data[0], song_data[2]), file=fobj) # pprint(DB) # with open("list.txt", "w+", encoding="utf-8") as fobj: # for song_id, song_data in DB.items(): # song_name = song_data[0] # song_artist = song_data[2] # query = f"ytsearch1:{song_name} {song_artist}" # print(query, file=fobj) # url = "https://youtube.com/results?{}".format(urlencode({"search_query": query})) # req = Request(url, headers={"Connection-Type": "close", "User-Agent": "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 6P Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.83 Mobile Safari/537.36"}) # resp = urlopen(req) # if resp.status == 200: # soup = BeautifulSoup(resp.read().decode("utf-8")) # print(soup.prettify()) # import pdb; pdb.set_trace() # for tag in soup.find_all("a"): # print(tag["href"]) # if re.search(r"watch", tag["href"]): # DB[song_id].append(tag["href"]) # print("Found song: {}".format(tag["href"])) # break # else: # print("no URL found") # time.sleep(1) # with open("output.json", "w+", encoding="utf-8") as fobj: # json.dump(DB, fobj)