this model predicts when given youtuber will post new video based on his previous wait times.

test:

RMSE (b艂膮d): 3.71 days MAE (艣redni b艂膮d bezwzgl臋dny): 2.34 days R-squared (dopasowanie): 0.42

(i think its good)

example code:

import pandas as pd
import numpy as np
import joblib
import datetime
import time
from googleapiclient.discovery import build

# Ustawienia
API_KEY = "api_key"
INPUT_WINDOW_SIZE = 5


# --- FUNKCJE DO POBIERANIA DANYCH ---

def get_channel_id(youtube, channel_name):
    request = youtube.channels().list(
        part="snippet",
        forHandle=channel_name.replace("@", "")
    )
    response = request.execute()
    if 'items' in response and response['items']:
        return response['items'][0]['id']
    return None


def get_channel_info(youtube, channel_id):
    request = youtube.channels().list(
        part="statistics",
        id=channel_id
    )
    response = request.execute()
    if 'items' in response and response['items']:
        return int(response['items'][0]['statistics']['subscriberCount'])
    return 0


def get_video_dates(youtube, channel_id, num_videos):
    video_dates = []
    try:
        res = youtube.channels().list(id=channel_id, part='contentDetails').execute()
        if not res['items']:
            return video_dates
        playlist_id = res['items'][0]['contentDetails']['relatedPlaylists']['uploads']
    except Exception as e:
        print(f"B艂膮d API w funkcji get_video_dates: {e}")
        return video_dates

    next_page_token = None
    while len(video_dates) < num_videos:
        request = youtube.playlistItems().list(
            playlistId=playlist_id,
            part="contentDetails,snippet",  # Dodano snippet aby uzyska膰 dok艂adniejsz膮 dat臋
            maxResults=min(50, num_videos - len(video_dates)),
            pageToken=next_page_token
        )
        response = request.execute()

        for item in response['items']:
            published_at_str = item['snippet']['publishedAt']
            published_at = datetime.datetime.fromisoformat(published_at_str.replace('Z', '+00:00'))
            video_dates.append(published_at)

        next_page_token = response.get('nextPageToken')
        if not next_page_token:
            break
        time.sleep(0.1)

    return sorted(video_dates, reverse=True)  # Sortowanie od najnowszego do najstarszego


# --- G艁脫WNA LOGIKA ---

def main():
    try:
        mlp = joblib.load('model1/mlp_model.joblib')
        scaler_subs = joblib.load('model1/scaler_subs.joblib')
    except FileNotFoundError:
        print("B艂膮d: Nie znaleziono plik贸w modelu. Uruchom 'train.py' aby je utworzy膰.")
        return

    youtube = build("youtube", "v3", developerKey=API_KEY)

    while True:
        channel_handle = input("\nPodaj nazw臋 kana艂u (np. @PewDiePie) lub 'q' aby zako艅czy膰: ")
        if channel_handle.lower() == 'q':
            break

        print(f"Pobieranie danych dla kana艂u {channel_handle}...")

        channel_id = get_channel_id(youtube, channel_handle)
        if not channel_id:
            print("Nie uda艂o si臋 znale藕膰 kana艂u. Spr贸buj innej nazwy.")
            continue

        subscriber_count = get_channel_info(youtube, channel_id)
        video_dates = get_video_dates(youtube, channel_id, INPUT_WINDOW_SIZE + 1)

        if len(video_dates) < INPUT_WINDOW_SIZE + 1:
            print(
                f"B艂膮d: Kana艂 {channel_handle} ma za ma艂o film贸w ({len(video_dates)}) do przewidywania. Potrzeba co najmniej {INPUT_WINDOW_SIZE + 1}.")
            continue

        # Obliczenie odst臋p贸w czasowych (od najnowszego wstecz)
        intervals = []
        for i in range(INPUT_WINDOW_SIZE):
            interval_days = (video_dates[i] - video_dates[i + 1]).total_seconds() / (60 * 60 * 24)
            intervals.append(interval_days)

        # Sprawdzenie czy odst臋py s膮 w sensownym zakresie (poni偶ej 60 dni)
        if max(intervals) > 60:
            print("Uwaga: Ostatnie filmy zawieraj膮 du偶e przerwy. Przewidywanie mo偶e by膰 mniej dok艂adne.")

        # Normalizacja danych
        normalized_intervals = [np.log1p(i) for i in intervals]
        normalized_subs = scaler_subs.transform([[subscriber_count]])[0][0]
        features = normalized_intervals + [normalized_subs]

        # Przewidywanie i odwracanie normalizacji
        predicted_log_interval = mlp.predict([features])
        predicted_interval_days = np.expm1(predicted_log_interval)[0]

        # Wypisanie wyniku
        print(f"\n--- Przewidywanie dla {channel_handle} ---")
        print(f"Ostatnie {INPUT_WINDOW_SIZE} odst臋p贸w: {[round(i, 2) for i in intervals]} dni")
        print(f"Liczba subskrybent贸w: {subscriber_count:,}")
        print(f"Model przewiduje, 偶e nast臋pny film pojawi si臋 za {predicted_interval_days:.2f} dni.")


if __name__ == "__main__":
    main()
Downloads last month
-
Inference Providers NEW
This model isn't deployed by any Inference Provider. 馃檵 Ask for provider support