Script for www.caratulasdecine.com (Spanish)

If you made a script you can offer it to the others here, or ask help to improve it. You can also report here bugs & problems with existing scripts.
Post Reply
japg2000
Posts: 15
Joined: 2004-03-22 19:14:14

Script for www.caratulasdecine.com (Spanish)

Post by japg2000 »

I've made this script to get the spanish pictures from www.caratulasdecine.com.

Code: Select all

// GETINFO SCRIPTING
// www.caratulasdecine.com - Large picture

(***************************************************
 *  Author: japg2000 (japg2000@terra.es)               *
 *  Script for the importation of pictures from    *
 *  caratulasdecine                                *
 *                                                 *
 *  For use with Ant Movie Catalog 3.4.0           *
 *  www.ant.be.tf/moviecatalog ··· www.buypin.com  *
 *                                                 *
 *  The source code of the script can be used in   *
 *  another program only if full credits to        *
 *  script author and a link to Ant Movie Catalog  *
 *  website are given in the About box or in       *
 *  the documentation of the program               *
 ***************************************************)

program CaratulasdecineImport;

var
  CaratulasdecinePage, MoviePage: TStringList;
  PageOfLetter: Array of string;
  Dir, Line, LineTmp, Title: string;
  C: Char;
  Encontrado, Salir, BeginPos, EndPos, Index, LineNr, LineMov: Integer;
  debug: Integer;

function FindLine(Pattern: string; List: TStringList; StartAt: Integer): Integer;
var
  i: Integer;
begin
  result := -1;
  if StartAt < 0 then
    StartAt := 0;
  for i := StartAt to List.Count-1 do
    if Pos(Pattern, List.GetString(i)) <> 0 then
    begin
      result := i;
      Break;
    end;
end;

function EliminaInicio(S: string; CR: string): string;
begin
  result := S;
  while Pos(CR, result) = 1 do
  begin
    Delete(result, 1, Length(CR));
  end;
end;

function CadenaEntre(var S: string; StartTag: string; EndTag: string): string;
var
  InicioPos: Integer;
begin
  InicioPos := Pos(StartTag, S);
  Delete(S, 1, InicioPos + Length(StartTag) - 1);
  InicioPos := Pos(EndTag, S);
  result := copy(S, 1, InicioPos - 1);
  Delete(S, 1, InicioPos + 1);
end;

function PreparaTitulo(T: string): string;
var
  i: Integer;
begin
  result := AnsiLowerCase(T);
  HTMLDecode(result);
  result := StringReplace(result, chr(146), '´');
  result := StringReplace(result, chr(39), '´');
  result := StringReplace(result, '', '´');
  result := StringReplace(result, '´', '´');
  result := StringReplace(result, '`', '´');
  result := StringReplace(result, '¿', '');
  result := StringReplace(result, '?', '');
  result := StringReplace(result, '¡', '');
  result := StringReplace(result, '!', '');
  result := StringReplace(result, 'á', 'a');
  result := StringReplace(result, 'é', 'e');
  result := StringReplace(result, 'í', 'i');
  result := StringReplace(result, 'ó', 'o');
  result := StringReplace(result, 'ú', 'u');
end;

begin
  Setarraylength(PageOfLetter,36);
  PageOfLetter[0]:='0_9';  PageOfLetter[1]:='0_9';  
  PageOfLetter[2]:='0_9';  PageOfLetter[3]:='0_9';  
  PageOfLetter[4]:='0_9';  PageOfLetter[5]:='0_9';  
  PageOfLetter[6]:='0_9';  PageOfLetter[7]:='0_9';  
  PageOfLetter[8]:='0_9';  PageOfLetter[9]:='0_9';
  PageOfLetter[10]:='a';   PageOfLetter[11]:='b';   
  PageOfLetter[12]:='c';   PageOfLetter[13]:='d';
  PageOfLetter[14]:='e';   PageOfLetter[15]:='f_h'; 
  PageOfLetter[16]:='f_h'; PageOfLetter[17]:='f_h';
  PageOfLetter[18]:='i_k'; PageOfLetter[19]:='i_k';
  PageOfLetter[20]:='i_k'; PageOfLetter[21]:='l';
  PageOfLetter[22]:='m_n'; PageOfLetter[23]:='m_n'; 
  PageOfLetter[24]:='o_q'; PageOfLetter[25]:='o_q';
  PageOfLetter[26]:='o_q'; PageOfLetter[27]:='r_s'; 
  PageOfLetter[28]:='r_s'; PageOfLetter[29]:='t_z';
  PageOfLetter[30]:='t_z'; PageOfLetter[31]:='t_z'; 
  PageOfLetter[32]:='t_z'; PageOfLetter[33]:='t_z';
  PageOfLetter[34]:='t_z'; PageOfLetter[35]:='t_z';
  
  Title := PreparaTitulo(GetField(fieldTranslatedTitle));
  C := copy(Title, 1, 1);

  if (C >= '0') and (C <= '9') then
  begin
  	Index := Ord(C) - Ord('0');
  end
  else
  begin
  	Index := 10 + Ord(C) - Ord('a');
  end;
  
  CaratulasdecinePage := TStringList.Create;
  CaratulasdecinePage.Text := GetPage('http://www.caratulasdecine.com/' + PageOfLetter[Index] + '.htm');

  //CaratulasdecinePage.SaveToFile('z:\caratulas.txt');
  //CaratulasdecinePage.LoadFromFile('z:\caratulas.txt');

  LineNr := FindLine('</big></big></big></big></big></big>', CaratulasdecinePage, 0);
  LineNr := LineNr + 1;
  Line := CaratulasdecinePage.GetString(LineNr);
  Line := EliminaInicio(Line, ' ');
  Salir := 0;
  Encontrado := 0;
  debug := 0;
  repeat
    while (Pos('<br>', Line) = 0) and (Pos('</td>', Line) = 0) and (LineNr < 1000) do
    begin
      if copy(Line, Length(Line), 1) <> ' ' then
        Line := Line + ' ';
      LineNr := LineNr + 1;
      LineTmp := CaratulasdecinePage.GetString(LineNr);
      LineTmp := EliminaInicio(LineTmp, ' ');
      Line := Line + LineTmp;
    end;

    if Pos('</td>', Line) > 0 then
      Salir := 1;

    if Pos('<a class="A" href="', Line) > 0 then
    begin
      Dir := 'http://www.caratulasdecine.com/' + CadenaEntre(Line, '<a class="A" href="', '">');
    end
    else
    begin
      Dir := Line;
      Dir := CadenaEntre(Dir, '<a href="', 'class="A">');
      while Pos('<a href="', Dir) > 0 do
        Dir := CadenaEntre(Dir, '<a href="', 'class="A">');
    end;
    //showmessage(Dir);

    Line := CadenaEntre(Line, '', '<');
    Line := PreparaTitulo(Line);
    // ShowMessage(Title + ' ¿=? ' + Line + ' -> ' + Dir);
    // if Pos('***',Line) > 0 then
    //  debug:=1;

    if Pos(Title, Line) > 0 then
    begin
      MoviePage := TStringList.Create;
      MoviePage.Text := GetPage(Dir);
      //MoviePage.SaveToFile('z:\caratulasmovie.txt');
      LineMov := FindLine('<p align="center"><img src="', MoviePage, 0);
      Line := MoviePage.GetString(LineMov);
      Line := CadenaEntre(Line, '<p align="center"><img src="', '" ');
      Line := EliminaInicio(Line, '../');
      GetPicture('http://www.caratulasdecine.com/' + Line, True);
      Salir := 1;
      Encontrado := 1;
    end;

    LineNr := LineNr + 1;
    if LineNr > 1000 then
      Salir := 1;
      
    Line := CaratulasdecinePage.GetString(LineNr);
    Line := EliminaInicio(Line, ' ');
  until Salir = 1;

  if (Encontrado <> 1) then
     ShowMessage('Titulo ' + Title + ' no encontrado en ' + 'http://www.caratulasdecine.com/' + PageOfLetter[Index] + '.htm');

end.
Greetings.
micmic
Posts: 24
Joined: 2004-03-21 13:36:45
Location: Mi casa
Contact:

Re: Script for www.caratulasdecine.com (Spanish)

Post by micmic »

japg2000 wrote:

Code: Select all

  Title := PreparaTitulo(GetField(fieldTranslatedTitle));
Whay about this?:

Code: Select all

Title := PreparaTitulo(GetField(fieldTranslatedTitle));
Input('Import from caratulasdecine.com', 'Please, the title is:', Title);
Title := PreparaTitulo(Title);
Later:
japg2000 wrote:

Code: Select all

  if (Encontrado <> 1) then
     ShowMessage('Titulo ' + Title + ' no encontrado en ' + 'http://www.caratulasdecine.com/' + PageOfLetter[Index] + '.htm');
I add this:

Code: Select all

  else DisplayResults;
Thanks for your script it is very interest.
japg2000
Posts: 15
Joined: 2004-03-22 19:14:14

Post by japg2000 »

Good advice, thanks :)

Greetings.
micmic
Posts: 24
Joined: 2004-03-21 13:36:45
Location: Mi casa
Contact:

Post by micmic »

I'm thinking in script for LaButaca.com, the index pages is very similar: Thanks
micmic
Posts: 24
Joined: 2004-03-21 13:36:45
Location: Mi casa
Contact:

Post by micmic »

Hey, I make a script using Google for you: the advantage is that you are looking for the exact title and with this if your title is "007" you can choose witch one you prefer.

Code: Select all

// GETINFO SCRIPTING
// CaratulasDeCine+Google v1.0 by japg2000 & micmic

(***************************************************
 *  Author: japg2000 (japg2000@terra.es)           *
 *          micmic   (micmic@dieznet.com)          *
 *  Script for the importation of pictures from    *
 *  caratulasdecine                                *
 *                                                 *
 *  For use with Ant Movie Catalog 3.4.0           *
 *  www.ant.be.tf/moviecatalog ··· www.buypin.com  *
 *                                                 *
 *  The source code of the script can be used in   *
 *  another program only if full credits to        *
 *  script author and a link to Ant Movie Catalog  *
 *  website are given in the About box or in       *
 *  the documentation of the program               *
 ***************************************************)


program CaratulasdecineImport;
var
  MovieName: string;
const
  Dominio = 'www.caratulasdecine.com';
  BaseURL1 = 'http://www.google.com/custom?hl=es&ie=ISO-8859-1&cof=&domains=';
  BaseURL2 = '&q=';
  BaseURL3 = '&btnG=B%FAsqueda+en+Google&sitesearch=';

function FindLine(Pattern: string; List: TStringList; StartAt: Integer): Integer;
var
  i: Integer;
begin
  result := -1;
  if StartAt < 0 then
    StartAt := 0;
  for i := StartAt to List.Count-1 do
    if Pos(Pattern, List.GetString(i)) <> 0 then
    begin
      result := i;
      Break;
    end;
end;

function EliminaInicio(S: string; CR: string): string;
begin
  result := S;
  while Pos(CR, result) = 1 do
  begin
    Delete(result, 1, Length(CR));
  end;
end;

function CadenaEntre(var S: string; StartTag: string; EndTag: string): string;
var
  InicioPos: Integer;
begin
  InicioPos := Pos(StartTag, S);
  Delete(S, 1, InicioPos + Length(StartTag) - 1);
  InicioPos := Pos(EndTag, S);
  result := copy(S, 1, InicioPos - 1);
  Delete(S, 1, InicioPos + 1);
end;

procedure AnalyzePage(Address: string);
var
  Page: TStringList;
  LineNr: Integer;
  PosIni, PosFin: Integer;
  Line, SubLine: string;
  Title, DirURL: string;
  txtTemp: string;
begin
  Page := TStringList.Create;
  Page.Text := GetPage(Address);
  if Pos('No se encontró ninguna página', Page.Text) > 0 then
  begin
    ShowMessage('No se ha encontrado ningún artículo por título.');
  end else
  begin
    PickTreeClear;
    PickTreeAdd('Resultados de la búsqueda para "' + MovieName + '" (' + Dominio + ') por Google:', '');
    
    Page.Text := StringReplace(Page.Text, '<br>', #13#10);
    Page.Text := StringReplace(Page.Text, '<p class=g>', #13#10 + '<p class=g>');

    // buscamos los resultados
    LineNr := 0;

    while LineNr < Page.Count do
    begin
      SubLine := Page.GetString(LineNr);
      txtTemp := '<p class=g><a href=';
      PosIni := pos(txtTemp, SubLine);
      if PosIni > 0 then
      begin
        SubLine := Copy(SubLine, PosIni + Length(txtTemp), Length(SubLine));
        txtTemp := '>';
        PosFin := pos(txtTemp, SubLine);
        DirURL := Copy(SubLine, 1, PosFin - 1);
        DirURL := StringReplace(DirURL, '"', '');
        
        SubLine := Copy(SubLine, PosFin + Length(txtTemp), Length(SubLine));
        txtTemp := '</a>';
        PosFin := pos(txtTemp, SubLine);
        Title := Copy(SubLine, 1, PosFin - 1);
        HTMLRemoveTags(Title);

        //ShowMessage(Title + '-->' + DirURL);
        PickTreeAdd(Title, DirURL);
      end;
      LineNr := LineNr + 1;
    end;

    Page.Free;
    if PickTreeExec(Address) then
      AnalyzeMoviePage(Address);
  end;
end;

procedure AnalyzeMoviePage(Address: string);
var
  MoviePage: TStringList;
  LineNr: Integer;
  Line: string;
begin

  MoviePage := TStringList.Create;
  MoviePage.Text := GetPage(Address);
  
  LineNr := FindLine('<title>', MoviePage, 0);
  Line := MoviePage.GetString(LineNr);
  Line := CadenaEntre(Line, '<title>', '</title>');
  SetField(fieldTranslatedTitle, Line);
  
  LineNr := FindLine('<p align="center"><img src="', MoviePage, 0);
  Line := MoviePage.GetString(LineNr);
  Line := CadenaEntre(Line, '<p align="center"><img src="', '" ');
  Line := EliminaInicio(Line, '../');
  GetPicture('http://www.caratulasdecine.com/' + Line, True);

  MoviePage.Free;
  DisplayResults;
end;

// bmicmic: Bucle Principal
begin
  if CheckVersion(3,4,0) then
  begin
    MovieName := GetField(fieldOriginalTitle);
    if MovieName = '' then MovieName := GetField(fieldTranslatedTitle);

    Input('Importar de ' + Dominio + ' (por Google)', 'Introduce el Titulo de la Pelicula:', MovieName);
    AnalyzePage(BaseURL1 + Dominio + BaseURL2 + UrlEncode(MovieName) + BaseURL3 + Dominio);

  end else
       ShowMessage('Este script necesita una versión superior de Ant Movie Catalog (al menos la version 3.4.0)');
end.
iNoT
Posts: 11
Joined: 2004-03-17 10:15:09

Post by iNoT »

ou, very interesting this script, but , this web there haven't info about films ?

and one question, why this scripts search in google? i understand it

I testing this script with the film "dagon" and when the script search apears one line with "actualidad" i think that it's a bug...


micmic you are king!
micmic
Posts: 24
Joined: 2004-03-21 13:36:45
Location: Mi casa
Contact:

Post by micmic »

This page haven't a "Find" command, and the films are listed in an index page a.htm, b.htm... with this scheme it isn't easy to find a film, the title must be completed also, with Google you can find a word and if you are la lucky man you get what you want.

Ah! I only cut and paste, I don't know delphi and my Pascal is obsolote, you can do your scripts looking for the html code and the "ifs" code. (ES: "Me has sacado los colores") ;)

There are a lot of very good scripts in this forum.
japg2000
Posts: 15
Joined: 2004-03-22 19:14:14

Post by japg2000 »

Great idea micmic. And it works very well. ;)

About the line 'Actualidad' (and another with 'Mercadillo de cine' that has appeared to me in some searchs) i've solved it changing the line

Code: Select all

          PickTreeAdd(Title, DirURL);
with this:

Code: Select all

        if ((Title <> 'Actualidad') and (Title <> 'Mercadillo de cine')) then
          PickTreeAdd(Title, DirURL);
Greetings.
japg2000
Posts: 15
Joined: 2004-03-22 19:14:14

Post by japg2000 »

Oh, and i'm waiting your script for LaButaca.com (if you have time to make it) :grinking:

Greetings
Gogeta

Post by Gogeta »

Como creo que todos hablamos español y a mi no se me da muy bien pues me dirijo a ustedes en español.

Mi idea sobre el script perfecto es aquel que no tenemos que seleccionar el nombre de la pelicula a no ser que haya dudas (Como el de yahoo) que añada una extensa informacion sobre la pelicula (culturalia por ejemplo) y que use caratulas grandes (como la butaca).
Haber si alguien me sorprende

:wink:
homeroarg

Post by homeroarg »

A script for La Butaca (www.labutaca.com) is exactly what we need !!!
This site is the spanich IMDB (the plots are translated from IMDB)
Murnau_Vs_Buñuel
Posts: 58
Joined: 2004-11-18 12:48:40

Post by Murnau_Vs_Buñuel »

estoy deacuerdo con gogeta :grinking:
Por cierto a mi el script de caratulasdecine + google no me funciona, no se a vosotros.
Post Reply