Here is an operational pre-release of Culturalia+IMDB (Batch) script. Try it with caution, with a backup first or a temporal movie database.
It works for most movies but not for others. Examples, for "El Arte de la Guerra" we get info for "Kickboxer 3: el arte de la guerra"; for "Hombres de negro" we get info for "Hombre de negro II".
Don't know why at the moment, perhaps culturalia problem, perhaps script problem. Must check it.
Code: Select all
// SCRIPTING
// Culturalia+IMDB (Batch)
(***************************************************
* Script merged by Jose Miguel Folgueira, based *
* on a similar script merged by Antoine Potten *
* *
* Movie importation script for: *
* IMDB (US), http://us.imdb.com *
* *
* (c) 2002 Antoine Potten antoine@buypin.com *
* Contributors : *
* Danny Falkov *
* Kai Blankenhorn *
* lboregard *
* Ork <ork@everydayangels.net> *
* Trekkie <Asimov@hotmail.com> *
* Youri Heijnen *
* *
* Movie importation script for: *
* Culturalia, http://www.culturalianet.com *
* *
* Original version made by David Arenillas *
* New version made by Antoine Potten *
* Contributors: *
* Jose Miguel Folgueira *
* RedDwarf *
* Hades666 *
* *
* Thanks to Culturalia's webmaster for his help *
* and for providing more direct access to his *
* database *
* *
* For use with Ant Movie Catalog 3.4.x *
* www.ant.be.tf/moviecatalog ··· www.buypin.com *
* *
* The source code of the script can be used in *
* another program only if full credits to *
* script author and a link to Ant Movie Catalog *
* website are given in the About box or in *
* the documentation of the program *
* *
***************************************************)
program Culturalia_IMDB_Batch;
var
MovieName, Titulo: string;
MovieURL: string;
Articles: array of string;
Index: Integer;
const
BaseURLCulturalia = 'http://www.culturalianet.com/bus/catalogo.php';
DescriptionToImport = 2;
{
2 = import longest
1 = import short (from main page, faster)
0 = display list to select a description
}
UseLongestDescIMDB = False; // If set to False shortest description available will be imported, faster since taken from main page
// Set the following constants to True to import field, or False to skip field (fiels to import from IMDB). By default, only the fields not available at Culturalia are set to True.
// Pon las siguientes constantes a True para importar campo o False para no hacerlo (campos a importar de IMDB). Por defecto, sólo los campos no disponibles en Culturalia están a True.
ImportActors = False;
ImportCategory = False;
ImportComments = False;
ImportCountry = False;
ImportDescription = False;
ImportDirector = False;
ImportLength = True;
ImportLanguage = False;
ImportOriginalTitle = False;
ImportPicture = False;
ImportRating = True;
ImportURL = False;
ImportYear = False;
function FindLine(Pattern: string; List: TStringList; StartAt: Integer): Integer;
var
i: Integer;
begin
result := -1;
if StartAt < 0 then
StartAt := 0;
for i := StartAt to List.Count-1 do
if Pos(Pattern, List.GetString(i)) <> 0 then
begin
result := i;
Break;
end;
end;
procedure AnalyzePageIMDB(Address: string);
var
Page: TStringList;
LineNr: Integer;
TitleFound: Boolean;
begin
Page := TStringList.Create;
Page.Text := GetPage(Address);
if pos('<TITLE>IMDb', Page.Text) = 0 then
begin
AnalyzeMoviePageIMDB(Page);
end else
begin
TitleFound := False;
LineNr := 0;
LineNr := FindLine('<H2><A NAME="top">Most popular searches</A></H2>', Page, LineNr);
if LineNr > -1 then
begin
MovieURL := AddMoviesTitles(Page, LineNr);
TitleFound := True;
end;
LineNr := FindLine('<H2><A NAME="mov">Movies</A></H2>', Page, LineNr);
if (LineNr > -1) And Not (TitleFound) then
begin
MovieURL := AddMoviesTitles(Page, LineNr);
TitleFound := True;
end;
LineNr := FindLine('<H2><A NAME="tvm">TV-Movies</A></H2>', Page, LineNr);
if (LineNr > -1) And Not (TitleFound) then
begin
MovieURL := AddMoviesTitles(Page, LineNr);
TitleFound := True;
end;
LineNr := FindLine('<H2><A NAME="vid">Made for video</A></H2>', Page, LineNr);
if (LineNr > -1) And Not (TitleFound) then
begin
MovieURL := AddMoviesTitles(Page, LineNr);
TitleFound := True;
end;
LineNr := FindLine('<H2><A NAME="tvs">TV series</A></H2>', Page, LineNr);
if (LineNr > -1) And Not (TitleFound) then
begin
MovieURL := AddMoviesTitles(Page, LineNr);
TitleFound := True;
end;
if TitleFound then
AnalyzePageIMDB(MovieURL);
end;
Page.Free;
end;
procedure AnalyzeMoviePageIMDB(Page: TStringList);
var
Line, Value, Value2, FullValue: string;
LineNr: Integer;
BeginPos, EndPos, DescrImport: Integer;
begin
DescrImport := DescriptionToImport;
if (DescrImport <> 1) and (Pos('<a href="plotsummary">', Page.Text) = 0) then
DescrImport := 1;
MovieURL := 'http://imdb.com/title/tt' + copy(Page.Text, pos('<a href="/title/tt',Page.Text)+19, 7);
// URL
SetField(fieldURL, MovieURL);
// Original Title & Year
if (ImportOriginalTitle) or (ImportYear) then
begin
LineNr := FindLine('<title>', Page, 0);
Line := Page.GetString(LineNr);
if LineNr > -1 then
begin
BeginPos := pos('<title>', Line);
if BeginPos > 0 then
BeginPos := BeginPos + 7;
EndPos := pos('(', Line);
if EndPos = 0 then
EndPos := Length(Line);
Value := copy(Line, BeginPos, EndPos - BeginPos - 1);
HTMLDecode(Value);
if ImportOriginalTitle then
OldOriginalTitle := GetField(fieldOriginalTitle);
if (ImportTranslatedTitle) and Not (LeaveOriginalTitle) then
SetField(fieldOriginalTitle, Value);
BeginPos := pos('(', Line) + 1;
if BeginPos > 0 then
begin
EndPos := Pos('/I', Line);
if EndPos < BeginPos then
EndPos := pos(')', Line);
Value := copy(Line, BeginPos, EndPos - BeginPos);
if ImportYear then
SetField(fieldYear, Value);
end;
end;
end;
// Rating
if ImportRating then
begin
LineNr := FindLine('User Rating:', Page, 0);
if LineNr > -1 then
begin
Line := Page.GetString(LineNr + 4);
if Pos('/10', Line) > 0 then
begin
BeginPos := pos('<b>', Line) + 3;
Value := IntToStr(Round(StrToInt(StrGet(Line, BeginPos), 0) + (StrToInt(StrGet(Line, BeginPos + 2), 0) / 10)));
SetField(fieldRating, Value);
end;
end;
end;
// Director
if ImportDirector then
begin
LineNr := FindLine('Directed by', Page, 0);
if LineNr > -1 then
begin
FullValue := '';
Line := Page.GetString(LineNr + 1);
repeat
BeginPos := pos('">', Line) + 2;
EndPos := pos('</a>', Line);
Value := copy(Line, BeginPos, EndPos - BeginPos);
if (Value <> '(more)') and (Value <> '') then
begin
if FullValue <> '' then
FullValue := FullValue + ', ';
FullValue := FullValue + Value;
end;
Delete(Line, 1, EndPos);
until Pos('</a>', Line) = 0;
HTMLDecode(FullValue);
SetField(fieldDirector, FullValue);
end;
end;
// Actors
if ImportActors then
begin
LineNr := FindLine('ast overview', Page, 0);
if LineNr = -1 then
LineNr := FindLine('redited cast', Page, 0);
if LineNr > -1 then
begin
FullValue := '';
Line := Page.GetString(LineNr);
repeat
BeginPos := Pos('<td valign="top">', Line);
if BeginPos > 0 then
begin
Delete(Line, 1, BeginPos);
Line := copy(Line, 25, Length(Line));
BeginPos := pos('">', Line) + 2;
EndPos := pos('</a>', Line);
if EndPos = 0 then
EndPos := Pos('</td>', Line);
Value := copy(Line, BeginPos, EndPos - BeginPos);
if (Value <> '(more)') and (Value <> '') then
begin
BeginPos := pos('.... </td><td valign="top">', Line);
if BeginPos > 0 then
begin
EndPos := pos('</td></tr>', Line);
BeginPos := BeginPos + 27;
Value2 := copy(Line, BeginPos, EndPos - BeginPos);
if Value2 <> '' then
begin
Value := Value + ' (as ' + Value2 + ')';
end;
end;
if FullValue <> '' then
FullValue := FullValue + ', ';
FullValue := FullValue + Value;
end;
EndPos := Pos('</td></tr>', Line);
Delete(Line, 1, EndPos);
end else
begin
Line := '';
end;
until Line = '';
HTMLDecode(FullValue);
SetField(fieldActors, FullValue);
end;
end;
//Country
if ImportCountry then
begin
LineNr := FindLine('Country:', Page, 0);
if LineNr > -1 then
begin
Line := Page.GetString(LineNr + 1);
BeginPos := pos('/">', Line) + 3;
EndPos := pos('</a>', Line);
Value := copy(Line, BeginPos, EndPos - BeginPos);
HTMLDecode(Value);
SetField(fieldCountry, Value);
end;
end;
// Category
if ImportCategory then
begin
LineNr := FindLine('Genre:', Page, 0);
if LineNr > -1 then
begin
Line := Page.GetString(LineNr + 1);
BeginPos := pos('/">', Line) + 3;
EndPos := pos('</a>', Line);
Value := copy(Line, BeginPos, EndPos - BeginPos);
HTMLDecode(Value);
SetField(fieldCategory, Value);
end;
end;
//Description
if ImportDescription then
begin
LineNr := FindLine('Plot Summary:', Page, 0);
if LineNr < 1 then
LineNr := FindLine('Plot Outline:', Page, 0);
if LineNr > -1 then
begin
Line := Page.GetString(LineNr);
BeginPos := pos('</b>', Line) + 5;
EndPos := pos('<a href', Line);
if EndPos < 1 then
begin
Line := Line + Page.GetString(LineNr+1);
EndPos := pos('<br><br>', Line);
if EndPos < 1 then
EndPos := Length(Line);
end;
Value := copy(Line, BeginPos, EndPos - BeginPos);
HTMLDecode(Value);
if UseLongestDescIMDB then
SetField(fieldDescription, GetDescriptions(MovieURL + 'plotsummary'))
else
SetField(fieldDescription, Value);
end;
end;
// Comments
if ImportComments then
begin
LineNr := FindLine('<b>Summary:</b>', Page, 0);
if LineNr > -1 then
begin
Value := '';
repeat
LineNr := LineNr + 1;
Line := Page.GetString(LineNr);
EndPos := Pos('</blockquote>', Line);
if EndPos = 0 then
EndPos := Length(Line)
else
EndPos := EndPos - 1;
Value := Value + Copy(Line, 1, EndPos) + ' ';
until Pos('</blockquote>', Line) > 0;
HTMLDecode(Value);
Value := StringReplace(Value, '<br>', #13#10);
Value := StringReplace(Value, #13#10+' ', #13#10);
SetField(fieldComments, Value);
end;
end;
// Length
if ImportLength then
begin
LineNr := FindLine('Runtime:', Page, 0);
if LineNr > -1 then
begin
Line := Page.GetString(LineNr + 1);
EndPos := pos(' min', Line);
if EndPos = 0 then
EndPos := pos(' /', Line);
if EndPos = 0 then
EndPos := Length(Line);
if Pos(':', Line) < EndPos then
BeginPos := Pos(':', Line) + 1
else
BeginPos := 1;
Value := copy(Line, BeginPos, EndPos - BeginPos);
SetField(fieldLength, Value);
end;
end;
// Language
LineNr := FindLine('Language:', Page, 0);
if LineNr > -1 then
begin
Line := Page.GetString(LineNr + 1);
BeginPos := pos('/">', Line) + 3;
EndPos := pos('</a>', Line);
if EndPos = 0 then
EndPos := Length(Line);
Value := copy(Line, BeginPos, EndPos - BeginPos);
if ImportLanguage then
SetField(fieldLanguages, Value);
end;
// Picture
if ImportPicture then
begin
LineNr := FindLine('<img alt="cover" align="left" src="http://ia.imdb.com/media/imdb/', Page, 0);
if LineNr < 0 then
LineNr := FindLine('<img alt="cover" align="left" src="http://posters.imdb.com/', Page, 0);
if LineNr < 0 then
LineNr := FindLine('<img alt="cover" align="left" src="http://images.amazon.com/', Page, 0);
if LineNr > -1 then
begin
Line := Page.GetString(LineNr);
BeginPos := pos('src="', Line) + 4;
Delete(Line, 1, BeginPos);
EndPos := pos('"', Line);
Value := copy(Line, 1, EndPos - 1);
GetPicture(Value, False); // False = do not store picture externally ; store it in the catalog file
end;
end;
end;
function GetDescriptions(Address: string): string;
var
Line, Value: string;
LineNr: Integer;
BeginPos, EndPos,Longest: Integer;
Page: TStringList;
begin
Result := '';
Longest := 0;
Page := TStringList.Create;
Page.Text := GetPage(Address);
LineNr := FindLine('<p class="plotpar">', Page, 0);
while LineNr > -1 do
begin
Value := '';
repeat
Line := Page.GetString(LineNr);
BeginPos := pos('"plotpar">', Line);
if BeginPos > 0 then
BeginPos := BeginPos + 10
else
BeginPos := 1;
EndPos := pos('</p>', Line);
if EndPos < 1 then
EndPos := Length(Line) + 1;
if Value <> '' then
Value := Value + ' ';
Value := Value + copy(Line, BeginPos, EndPos - BeginPos);
LineNr := LineNr + 1;
until (pos('</p>', Line) > 0) or (LineNr = Page.Count);
HTMLDecode(Value);
PickListAdd(Value);
if Length(Value) > Longest then
begin
Result := Value;
Longest := Length(Value);
end;
LineNr := FindLine('<p class="plotpar">', Page, LineNr);
end;
Page.Free;
end;
function AddMoviesTitles(Page: TStringList; var LineNr: Integer): String;
var
Line: string;
MovieTitle, MovieAddress: string;
StartPos: Integer;
begin
repeat
LineNr := LineNr + 1;
Line := Page.GetString(LineNr);
StartPos := pos('="', Line);
if StartPos > 0 then
begin
Startpos := Startpos + 2;
MovieAddress := copy(Line, StartPos, pos('">', Line) - StartPos);
StartPos := pos('">', Line) + 2;
MovieTitle := copy(Line, StartPos, pos('</A>', Line) - StartPos);
HTMLDecode(Movietitle);
if Length(Result) <= 0 then
Result := 'http://us.imdb.com' + MovieAddress;
end;
until pos('</OL>', Line) > 0;
end;
procedure AnalyzePageCulturalia(Address: string);
var
Page: TStringList;
LineNr: Integer;
Code, Title, TitleOrig, Year, temp, temp2: string;
begin
Page := TStringList.Create;
Page.Text := GetPage(Address);
if Pos('No se ha encontrado ningún artículo por título', Page.Text) = 0 then
begin
LineNr := 1;
Page.Text := StringReplace(Page.Text, '<br>', #13#10);
temp := MovieName + '.';
while (Title <> temp) and (LineNr + 3 < Page.Count) do
begin
Code := GetValueAfter(Page.GetString(LineNr), 'Codigo = ');
Title := GetValueAfter(Page.GetString(LineNr+1), 'Titulo = ');
temp2 := copy(Title,length(Title)-4,length(Title));
if (temp2 = ', El.') or (temp2 = ', La.') or (temp2 = ', Un.') then
begin
temp2 := copy(Title, 1, length(Title)-5);
Title := temp2;
end
temp2 := copy(Title,length(Title)-5,length(Title));
if (temp2 = ', Los.') or (temp2 = ', Las.') or (temp2 = ', Una.') then
begin
temp2 := copy(Title, 1, length(Title)-6);
Title := temp2;
end
TitleOrig := GetValueAfter(Page.GetString(LineNr+2), 'Titulo original = ');
//Year := GetValueAfter(Page.GetString(LineNr+3), 'Año = ');
Address := (BaseURL + '?catalogo=1&codigo=' + Code);
lineNr := LineNr + 5;
end;
Page.Free;
AnalyzeMoviePageCulturalia(Address);
end;
end;
procedure AnalyzeMoviePageCulturalia(Address: string);
var
Page: TStringList;
Comments: string;
strTitle: string;
strSinopsis: string;
Line: string;
LineNr: Integer;
begin
Page := TStringList.Create;
Page.Text := StringReplace(GetPage(Address), '<br><br>', #13#10);
Page.Text := StringReplace(Page.Text, '<br>', #13#10);
strTitle := GetValueAfter(Page.GetString(1), 'Titulo = ');
if copy(strTitle, Length(strTitle), Length(strTitle)) = '.' then
begin
SetField(fieldTranslatedTitle, copy(strTitle, 1, Length(strTitle) -1 ));
end else
begin
SetField(fieldTranslatedTitle, strTitle);
end;
SetField(fieldOriginalTitle, GetValueAfter(Page.GetString(2), 'Titulo original = '));
SetField(fieldYear, GetValueAfter(Page.GetString(3), 'Año = '));
SetField(fieldCategory, GetValueAfter(Page.GetString(4), 'Genero = '));
SetField(fieldCountry, GetValueAfter(Page.GetString(5), 'Nacion = '));
SetField(fieldDirector, GetValueAfter(Page.GetString(6), 'Director = '));
SetField(fieldActors, GetValueAfter(Page.GetString(7), 'Actores = '));
SetField(fieldProducer, GetValueAfter(Page.GetString(8), 'Productor = '));
Comments := 'Guión: ' + GetValueAfter(Page.GetString(9), 'Guion = ');
Comments := Comments + #13#10 + 'Fotografía: ' + GetValueAfter(Page.GetString(10), 'Fotografia = ');
Comments := Comments + #13#10 + 'Música: ' + GetValueAfter(Page.GetString(11), 'Musica = ');
SetField(fieldComments, Comments);
LineNr := FindLine('Sinopsis = ', Page, 0);
Line := Page.GetString(LineNr);
strSinopsis := GetValueAfter(Line, 'Sinopsis = ');
LineNr := LineNr + 1;
Line := Page.GetString(LineNr);
while pos('URL = ', Line) = 0 do
begin
strSinopsis := strSinopsis + #13#10 + Line;
LineNr := LineNr + 1;
Line := Page.GetString(LineNr);
end
HTMLRemoveTags(strSinopsis);
SetField(fieldDescription, StringReplace(StringReplace(strSinopsis, '“', '"'), '”', '"'));
LineNr := FindLine('URL = ', Page, 0);
if LineNr <> -1 then
SetField(fieldURL, GetValueAfter(Page.GetString(LineNr), 'URL = '));
LineNr := FindLine('Imagen = ', Page, 0);
if LineNr <> -1 then
GetPicture(GetValueAfter(Page.GetString(LineNr), 'Imagen = '), False);
Page.Free;
end;
function GetValueAfter(Line, Identifier: string): string;
begin
if Pos(Identifier, Line) = 1 then
Result := Copy(Line, Length(Identifier)+1, Length(Line))
else
Result := '';
end;
begin
SetArrayLength(Articles,6);
Articles[0]:='El ';
Articles[1]:='La ';
Articles[2]:='Los ';
Articles[3]:='La ';
Articles[4]:='Un ';
Articles[5]:='Una ';
if CheckVersion(3,4,0) then
begin
MovieName := GetField(fieldTranslatedTitle);
if MovieName = '' then
MovieName := GetField (fieldOriginalTitle);
if MovieName = '' then
MovieName := Input('Importar de Culturalia', 'Introduce el Titulo de la Pelicula:', MovieName);
if MovieName <> '' then
begin
for Index := 0 to 5 do
begin
if Pos(Articles[Index], MovieName) <> 0 then
MovieName := copy(MovieName, length(Articles[Index]), length(MovieName));
end;
AnalyzePageCulturalia(BaseURLCulturalia + '?catalogo=1&texto=' + UrlEncode(MovieName) + '&donde=3');
AnalyzePageIMDB('http://us.imdb.com/Tsearch?title='+UrlEncode(GetField(fieldOriginalTitle)));
end;
end else
ShowMessage('This script requires a newer version of Ant Movie Catalog (at least the version 3.4.0)');
end.