no-more-spotify/nms.py


import csv
from pathlib import Path
from pprint import PrettyPrinter

from urllib.parse import urlencode
from urllib.request import urlopen
from urllib.request import Request

import time
from bs4 import BeautifulSoup
import re
import json

pprint = PrettyPrinter().pprint
DIR = Path("spotify_playlists")

DB = {}

for playlist in DIR.glob("*.csv"):
    with playlist.open(encoding="utf-8") as fobj:
        reader = csv.reader(fobj)

        for lineno, line in enumerate(reader):
            if lineno == 0:
                continue

            _id = line[0]
            if _id not in DB:
                DB[_id] = line[1:]

d = list(Path("Z:/nms").glob("*"))
with open("not-found.txt", "w+", encoding="utf-8") as fobj:
    for song_id, song_data in DB.items():
        for _file in d:
            if song_data[2] in _file.name:
                print("OK {}".format(song_data[0]))
                break
        else:
            print("ytsearch1:{} {}".format(song_data[0], song_data[2]), file=fobj)


# pprint(DB)

# with open("list.txt", "w+", encoding="utf-8") as fobj:
#     for song_id, song_data in DB.items():
#         song_name = song_data[0]
#         song_artist = song_data[2]
#         query = f"ytsearch1:{song_name} {song_artist}"
#         print(query, file=fobj)

        # url = "https://youtube.com/results?{}".format(urlencode({"search_query": query}))
        # req = Request(url, headers={"Connection-Type": "close", "User-Agent": "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 6P Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.83 Mobile Safari/537.36"})
        # resp = urlopen(req)

        # if resp.status == 200:
        #     soup = BeautifulSoup(resp.read().decode("utf-8"))
        #     print(soup.prettify())
        #     import pdb; pdb.set_trace()
        #     for tag in soup.find_all("a"):
        #         print(tag["href"])
        #         if re.search(r"watch", tag["href"]):
        #             DB[song_id].append(tag["href"])
        #             print("Found song: {}".format(tag["href"]))
        #             break
        #     else:
        #         print("no URL found")

        # time.sleep(1)

# with open("output.json", "w+", encoding="utf-8") as fobj:
#     json.dump(DB, fobj)