this model predicts when given youtuber will post new video based on his previous wait times.
test:
RMSE (błąd): 3.71 days MAE (średni błąd bezwzględny): 2.34 days R-squared (dopasowanie): 0.42
(i think its good)
example code:
import pandas as pd
import numpy as np
import joblib
import datetime
import time
from googleapiclient.discovery import build

# Ustawienia
API_KEY = "api_key"
INPUT_WINDOW_SIZE = 5


# --- FUNKCJE DO POBIERANIA DANYCH ---

def get_channel_id(youtube, channel_name):
    request = youtube.channels().list(
        part="snippet",
        forHandle=channel_name.replace("@", "")
    )
    response = request.execute()
    if 'items' in response and response['items']:
        return response['items'][0]['id']
    return None


def get_channel_info(youtube, channel_id):
    request = youtube.channels().list(
        part="statistics",
        id=channel_id
    )
    response = request.execute()
    if 'items' in response and response['items']:
        return int(response['items'][0]['statistics']['subscriberCount'])
    return 0


def get_video_dates(youtube, channel_id, num_videos):
    video_dates = []
    try:
        res = youtube.channels().list(id=channel_id, part='contentDetails').execute()
        if not res['items']:
            return video_dates
        playlist_id = res['items'][0]['contentDetails']['relatedPlaylists']['uploads']
    except Exception as e:
        print(f"Błąd API w funkcji get_video_dates: {e}")
        return video_dates

    next_page_token = None
    while len(video_dates) < num_videos:
        request = youtube.playlistItems().list(
            playlistId=playlist_id,
            part="contentDetails,snippet",  # Dodano snippet aby uzyskać dokładniejszą datę
            maxResults=min(50, num_videos - len(video_dates)),
            pageToken=next_page_token
        )
        response = request.execute()

        for item in response['items']:
            published_at_str = item['snippet']['publishedAt']
            published_at = datetime.datetime.fromisoformat(published_at_str.replace('Z', '+00:00'))
            video_dates.append(published_at)

        next_page_token = response.get('nextPageToken')
        if not next_page_token:
            break
        time.sleep(0.1)

    return sorted(video_dates, reverse=True)  # Sortowanie od najnowszego do najstarszego


# --- GŁÓWNA LOGIKA ---

def main():
    try:
        mlp = joblib.load('model1/mlp_model.joblib')
        scaler_subs = joblib.load('model1/scaler_subs.joblib')
    except FileNotFoundError:
        print("Błąd: Nie znaleziono plików modelu. Uruchom 'train.py' aby je utworzyć.")
        return

    youtube = build("youtube", "v3", developerKey=API_KEY)

    while True:
        channel_handle = input("\nPodaj nazwę kanału (np. @PewDiePie) lub 'q' aby zakończyć: ")
        if channel_handle.lower() == 'q':
            break

        print(f"Pobieranie danych dla kanału {channel_handle}...")

        channel_id = get_channel_id(youtube, channel_handle)
        if not channel_id:
            print("Nie udało się znaleźć kanału. Spróbuj innej nazwy.")
            continue

        subscriber_count = get_channel_info(youtube, channel_id)
        video_dates = get_video_dates(youtube, channel_id, INPUT_WINDOW_SIZE + 1)

        if len(video_dates) < INPUT_WINDOW_SIZE + 1:
            print(
                f"Błąd: Kanał {channel_handle} ma za mało filmów ({len(video_dates)}) do przewidywania. Potrzeba co najmniej {INPUT_WINDOW_SIZE + 1}.")
            continue

        # Obliczenie odstępów czasowych (od najnowszego wstecz)
        intervals = []
        for i in range(INPUT_WINDOW_SIZE):
            interval_days = (video_dates[i] - video_dates[i + 1]).total_seconds() / (60 * 60 * 24)
            intervals.append(interval_days)

        # Sprawdzenie czy odstępy są w sensownym zakresie (poniżej 60 dni)
        if max(intervals) > 60:
            print("Uwaga: Ostatnie filmy zawierają duże przerwy. Przewidywanie może być mniej dokładne.")

        # Normalizacja danych
        normalized_intervals = [np.log1p(i) for i in intervals]
        normalized_subs = scaler_subs.transform([[subscriber_count]])[0][0]
        features = normalized_intervals + [normalized_subs]

        # Przewidywanie i odwracanie normalizacji
        predicted_log_interval = mlp.predict([features])
        predicted_interval_days = np.expm1(predicted_log_interval)[0]

        # Wypisanie wyniku
        print(f"\n--- Przewidywanie dla {channel_handle} ---")
        print(f"Ostatnie {INPUT_WINDOW_SIZE} odstępów: {[round(i, 2) for i in intervals]} dni")
        print(f"Liczba subskrybentów: {subscriber_count:,}")
        print(f"Model przewiduje, że następny film pojawi się za {predicted_interval_days:.2f} dni.")


if __name__ == "__main__":
    main()
Downloads last month: -