[UPD ITA] Filmscoop.it rel. 2.1

If you made a script you can offer it to the others here, or ask help to improve it. You can also report here bugs & problems with existing scripts.
Post Reply
fulvio53s03
Posts: 753
Joined: 2007-04-28 05:46:43
Location: Italy

[UPD ITA] Filmscoop.it rel. 2.1

Post by fulvio53s03 »

Estrae tutti i commenti, codice ottimizzato.
Extracts all comments, optimized code.

Code: Select all

(***************************************************

Ant Movie Catalog importation script
www.antp.be/software/moviecatalog/

[Infos]
Authors=Fulvio53s03 from original Claudio Rinaldi (rinaldiclaudio@gmail.com) & Mrobama
Title=FilmScoop.it
Description=Get movie info from FilmScoop.it
Site=www.filmscoop.it
Language=IT
Version=2.1
Requires=4.2.2
Comments=Nuova versione di Fulvio53s03, estrae tutti i commenti. Esecuzione velocizzata!
License=
GetInfo=1
RequiresMovies=1

[Options]

[Parameters]

***************************************************)

program FilmScoopIT;

uses
  StringUtils7552;
const
  debug = false;
  folder = 'f:\prova\';                                   // directory where to save files;
  UrlBase = 'http://www.filmscoop.it';

  UrlRicerca = '&to=&r=&i=&n=&g=0&a=&o=Titolo&Submit=Cerca';
  QueryBase = UrlBase + '/ricerca/risultati.asp?t=';
  QueryFilm = UrlBase + '/film_al_cinema/';
  ImagePath = UrlBase + '/locandine/';

  cStartNumRis = 'Numero di risultati: <strong>'; // Result Number start Marker
  cEndNumRis = '</strong> - Pagin';               // Result Number end Marker
  cStartId = 'href="/film_al_cinema/';      // ID start marker
  cEndId = '"';                             // ID end marker
  cStartTitle = '<h1 class="TitoloFilmUpper"'; // Title start marker
  cEndTitle = '</a>';                          // Title end marker
  cStartTranslTitle = 'Titolo Originale</strong>: ';        // Translated title start marker
  cEndTranslTitle = '</h2>';                                // Translated title end marker
  cStartImg = '<img src="http://www.filmscoop.it/locandine/';               // Image start marker
  cEndImg = '" alt';                                                        // Image end marker
  cStartDirector = 'Regia</strong>:';               // Director start marker
  cStartCast = 'Interpreti</strong>:';              // Actor start marker
  cStartCategory = 'Genere</strong>:';       // Catogory start marker
  cEndCategory = '</a>';                    // Category end marker
  cStartDuration = 'Durata</strong>:';       // Duration start marker
  cEndDuration = '<br />';                    // Duration end marker
  cStartCountry = '<strong>Nazionalit&agrave;</strong>:';     // Country start Marker
  cEndCountry = '<strong>Genere';                             // Country end marker
  cstartrecensione = '<div id="TestoRecensione">';
  cendrecensione   = '</div>';
  cStartYear = ' ';                     // Year start marker
  cEndYear = '<br />';                 // Year end marker
  cStartDesc = '<h2 style="margin:25px 0 0 0;padding:0;font-weight:bold;">Trama del film';         // Description start marker
  cEndDesc = '</p>';                                                                               // Description end marker
  cStartComm = '<div align="center" class="comtext" style="margin-top:8px;">Commenti:';            // Comments start marker
  cMidComm  = '<div class="divCommentoLeft"';
  cEndComm = '<script type="text/javascript">';                                                    // Comments end marker
  cStartTitleList = '<strong>';            // Title list start marker
  cEndTitleList = '</strong>';            // Title list end marker
var
  MovieUrl, MovieName, TranslatedStr, PageStr:  string;
  cValue, extrvalue, AllComments: string;
  link_recensione, Allrecensione: string;
// -----------------------
// ANALYZE MOVIE DATA PAGE
// IN:  none
// OUT: set Ant fields
// -----------------------
procedure AnalyzeMoviePage;
var
  start_link_recensione, cImage, SaveValue: string;
  str_min: string;
  ore, minu, minuti: integer;
begin
  // Get packed title main page
  Allrecensione := '';
  PageStr := GetPage(MovieUrl);
  Pagestr := UTF8decode(Pagestr);
  Pagestr := textbefore(Pagestr, '<div class="BoxLeft">', '');                  //2018.05.29
  if debug then
     DumpPage(folder + 'filmscoop_movie_page.html', Pagestr);

  start_link_recensione := '<a href="/cgi-bin/recensioni/';
  if pos(start_link_recensione, Pagestr) > 0 then
     begin
     link_recensione := start_link_recensione + textbetween(Pagestr, start_link_recensione, '"');
     link_recensione := textafter(link_recensione, '"');
     link_recensione := Urlbase + link_recensione;
     estrai_recensione;
     end;
  // TRANSLATED TITLE
  cValue := CStartTitle + textbetween(PageStr, cStartTitle, cEndTitle) + cEndTitle;
  HTMLRemoveTags(cValue);
  HTMLDecode(cValue);
  cValue := fulltrim(cValue);
  SetField(fieldTranslatedTitle, AnsiUpFirstLetter(AnsiLowerCase(cValue)));

  // FILM IMAGE
  cImage := textbetween(PageStr, cStartImg, cEndImg);
  if cImage <> '' then
    GetPicture(ImagePath + cImage);
  // ORIGINAL TITLE
  cValue := textbetween(PageStr, cStartTranslTitle, cEndTranslTitle);
  HTMLRemoveTags(cValue);
  HTMLDecode(cValue);
  SetField(fieldOriginalTitle, AnsiUpFirstLetter(AnsiLowerCase(cValue)));
  // DIRECTOR
  cValue := textbetween(PageStr, cStartDirector, cStartCast);
  HTMLRemoveTags(cValue);
  HTMLDecode(cValue);
  cValue := fulltrim(cValue);
  SetField(fieldDirector, cValue);
  // ACTORS
  cValue := textbetween(PageStr, cStartCast, cStartDuration);
  HTMLRemoveTags(cValue);
  HTMLDecode(cValue);
  cValue := fulltrim(cValue);
  SetField(fieldActors, cValue);

  // COUNTRY
  cValue := textbetween(PageStr, cStartCountry, cEndCountry);
  HTMLRemoveTags(cValue);
  HTMLDecode(cValue);
  cValue := fulltrim(cValue);
  saveValue := cValue;
  cValue := textbefore(saveValue, ' 2', '');             //caratteri prima dell'anno di edizione
  if cValue = '' then
    cValue := textbefore(saveValue, ' 1', '');             //caratteri prima dell'anno di edizione
  SetField(fieldCountry, cValue);
  // YEAR
  cValue := textbetween(PageStr, cStartCountry, cEndCountry);
  HTMLRemoveTags(cValue);
  HTMLDecode(cValue);
  cValue := fulltrim(cValue);
  saveValue := cValue;
  cValue := textafter(saveValue, ' 2');               //caratteri dopo l'anno di edizione
  if cValue <> '' then
     cValue := '2' + cvalue                           //caratteri dopo l'anno di edizione
  else
     cvalue := '1' + textafter(saveValue, ' 1');
  SetField(fieldYear, cvalue);

  // CATEGORY
  cValue := textbetween(PageStr, cStartCategory, cEndCategory);
  HTMLRemoveTags(cValue);
  HTMLDecode(cValue);
  cValue := fulltrim(cValue);
  SetField(fieldCategory, AnsiUpFirstLetter(AnsiLowerCase(cValue)));

  // COMMENTS
  Allcomments := cStartComm + textbetween(PageStr, cStartComm, cEndComm) + cMidComm + cEndComm;
  if debug then
     DumpPage(folder + 'filmscoop_comments.html', AllComments);
  formatta_commenti;
  extrvalue := fulltrim(extrvalue);
  if length(extrvalue) > 4 then                              //lunghezza '--- ' iniziale ai commenti
     setField(fieldComments, extrvalue);

  // DESCRIPTION
  cValue := cStartDesc + textbetween(PageStr, cStartDesc, cEndDesc) + cEndDesc;
  cValue := '<p' + textBetween(cValue, '<p', '</p>') + '</p>';
  HTMLRemoveTags(cValue);
  HTMLDecode(cValue);
  cValue := fulltrim(cValue);
  if allrecensione <> '' then
     cValue := cValue + CRLF + CRLF + 'Recensione' + CRLF + allrecensione;
  SetField(fieldDescription, cValue);

  // URL
  SetField(fieldURL, MovieUrl);

  // DURATA
  cValue := textbetween(Pagestr, '<strong>Durata</strong>:', '<br />');
  cValue := textafter(cValue, 'h ');
//-------------- da discogs
   ore    := strtoint(TextBefore (cValue, '.', ''), 0);
   str_min := TextAfter (cValue, '.');
   minuti := strtoint(str_min, 0);
   if minuti > 0 then
      Minu := ore*60 + Minuti;
   if minuti = 0 then
      Minu := ore;
   str_min := inttostr(Minu);
   SetField(fieldLength, str_Min);
//------------ fine discogs

end;

procedure estrai_recensione;
begin
  Allrecensione := GetPage(link_recensione);
  Allrecensione := UTF8decode(Allrecensione);
  if debug then
     DumpPage(folder + 'filmscoop_recensione.html', Allrecensione);
  cValue := textbetween(Allrecensione, cstartrecensione, cendrecensione);
  HTMLRemoveTags(cValue);
  HTMLDecode(cValue);
  cValue := fulltrim(cValue);
  Allrecensione := cValue;
end;

procedure formatta_commenti;
var
startchar, endchar: string;
startspoil, endspoil, delstr: string;
precomments: string;
ctr_giri: integer;
begin
  ctr_giri := 1;
  startChar := '<div class="divCommentoLeft"';
  endChar   := '<div class="divCommentoLeft"';
  cValue := textbetween(AllComments, startchar, startchar) + endChar;
  if debug then
     DumpPage(folder + 'filmscoop_opinione ' + IntToStr(ctr_giri) + '.html', cValue);

  repeat
    ctr_giri := ctr_giri + 1;
    allcomments := '<div>' + stringReplace(allcomments, cValue, '');
//2018.06.01    allcomments := startChar + allcomments + endChar;         //2018.06.01
    cValue := textbefore(cValue, '<div class="risposte">', '');
    if debug then
       DumpPage(folder + 'filmscoop_opinione ' + IntToStr(ctr_giri) + '.html', cValue);

// elimina spoiler      --------------------
    startspoil := '<p class="spoiler">';
    endspoil   :=  '</p>';
    delstr := startspoil + textbetween(cValue, startspoil, endspoil) + endspoil;
    cValue := stringreplace(cValue, delstr, '');
    delstr := '<strong>SPOILER</strong>';
    cValue := stringreplace(cValue, delstr, '');
// fine elimina spoiler --------------------
    cvalue := stringreplace(cvalue, ' / 10', ' / 10. ---  ');
    delstr := 'id="divCommentoLeft';
    delstr := delstr + textbetween(cvalue, 'id="divCommentoLeft', '>') + '>';
    cvalue := stringreplace(cvalue, delstr, '');

    HTMLRemoveTags(cValue);
    HTMLDecode(cValue);
    cValue := fulltrim(cValue);
    extrValue := extrValue + CRLF + CRLF + '--- ' + cValue;
    precomments := allcomments;
    cValue := textbetween(AllComments, startchar, startchar) + endChar;
//    if cValue = (startchar + endchar) then                                                                       //2018.06.01
//       cValue := startchar + textbetween(PreComments, startchar, '<div class="risposte">');   //2018.06.01
    cValue := cValue;                                 //per stop esecuzione (debug)
  until cvalue = endChar;
//    all'uscita del loop, l'ultimo commento è contenuto in save_cValue. devo aggiungerlo!
//
end;


// ------------------------------------------------------------------
// FILL PICKTREE CONTROL WITH LINKS & TITLES or RETURN ONE PAGE LINK
// if OneFilm flag true return Film Id else populate PickTree
// IN:  OneFilm flag (bool)
// OUT: one page ID  (string)
// ------------------------------------------------------------------
function PopulatePickTree(OneFilm: boolean): string;
var
  cFilmId,cFilmTitle, indirizzo, save_indirizzo: string;
  StartPos,EndPos: integer;
begin
  if OneFilm then begin
    cFilmId := textbetween(PageStr,cStartId,cEndId);
    result  := stringreplace ((QueryFilm + cFilmId), '" rel="nofollow', '');    //2018.05.25
  end
  else begin
    PickTreeClear;
    repeat
      StartPos := pos(cStartId, PageStr);
      if StartPos > 0 then begin
        Delete(PageStr, 1, StartPos - 1);
        cFilmId := textbetween(PageStr,cStartId,cEndId);                        // Get ID
        HTMLRemoveTags(cFilmId);
        indirizzo := QueryFilm + cFilmId;                                       //2018.05.25
        cFilmTitle := textbetween(PageStr,cStartTitleList,cEndTitleList);       // Get Title
        HTMLRemoveTags(cFilmTitle);
        HTMLDecode(cFilmTitle);
        if (indirizzo <> save_indirizzo) and (pos('#trailer', indirizzo) = 0)    then
           PickTreeAdd(cFilmTitle, indirizzo);
        save_indirizzo := indirizzo;
        EndPos := pos(cStartId,PageStr);
        Delete(PageStr, 1, EndPos);
      end;
    until(StartPos = 0);
    result := '';
  end
end;

// ---------------------------------
// ANALYZE FIRST SEARCH RESULT PAGE:
// IN:  page Url (string)
// OUT: none
// ---------------------------------
procedure AnalyzeSearchPage(Url: string);
var
  NumRisultati : string;
begin
//  PageStr := RemoveextraChars(Url);       //rifare! esecuzione molto pesante!!!!!!!!
  PageStr := GetPage(Url);
  Pagestr := UTF8decode(Pagestr);
  if debug then
     DumpPage(folder + 'filmscoop_ricerca.txt', Pagestr);
  NumRisultati := textbetween(PageStr, cStartNumRis, cEndNumRis);
  
  if ( (NumRisultati = '0') or (NumRisultati = '')) then
    begin
      ShowMessage('Title not found / Nessun film trovato.');
      exit;
    end

  if NumRisultati = '1' then
    MovieUrl := PopulatePickTree(true)
  else
    begin
      PopulatePickTree(false);
      if not PickTreeExec(MovieUrl) then // ..select one
        exit;
    end;

  AnalyzeMoviePage;
end;

// ----------
// MAIN:
// IN:  none
// OUT: none
// ----------
begin
  if CheckVersion(4,2,2) then
    begin
      TranslatedStr := GetField(fieldTranslatedTitle);
      MovieName := GetField(fieldOriginalTitle);
      if (TranslatedStr <> '') then
        MovieName := TranslatedStr;

      if(Input('MyMovies.It', 'Enter the title of the movie', MovieName)) then
        begin
          MovieUrl := QueryBase + StringReplace(MovieName,' ','+') + UrlRicerca;
          AnalyzeSearchPage(MovieUrl);
        end;
    end
  else
    ShowMessage('This script requires a newer version of Ant Movie Catalog (at least the version 4.2.2)');
end.
antp
Site Admin
Posts: 9642
Joined: 2002-05-30 10:13:07
Location: Brussels
Contact:

Re: [UPD ITA] Filmscoop.it rel. 2.1

Post by antp »

Thanks
Post Reply